Load the required libraries.
library(tidyverse)
library(sf)
library(here)
library(readxl)
library(scales)
library(DT)
library(brms)
library(tidybayes)
library(patchwork)
library(marginaleffects)
library(ggrepel)
library(scico)
library(ggdensity)
library(ggpubr)
library(units)
library(glue)
library(ggh4x)
Functions that we will use throughout the script
#labeller for years
year_labels <- c(1950:1963)
#The Glasgow mass minuture chest X-ray campaign happened between 11th March and 12th April 1957
#Segment for graphs to match ACF period
acf_start <- decimal_date(ymd("1957-03-11"))
acf_end <- decimal_date(ymd("1957-04-12"))
Function for counterfactual plots
plot_counterfactual <- function(model_data, model, population_denominator, outcome, grouping_var=NULL, re_formula,...){
#labeller for years
year_labels <- c(1950:1964) #extra year for the extant of the x-axis
#The Glasgow mass minuture chest X-ray campaign happened between 11th March and 12th April 1957
#Segment for graphs to match ACF period
acf_start <- decimal_date(ymd("1957-03-11"))
acf_end <- decimal_date(ymd("1957-04-12"))
summary <- {{model_data}} %>%
select(year, year2, y_num, acf_period, {{population_denominator}}, {{outcome}}, {{grouping_var}}) %>%
add_epred_draws({{model}}, re_formula={{re_formula}}) %>%
group_by(year2, acf_period, {{grouping_var}}) %>%
mean_qi() %>%
mutate(.epred_inc = .epred/{{population_denominator}}*100000,
.epred_inc.lower = .epred.lower/{{population_denominator}}*100000,
.epred_inc.upper = .epred.upper/{{population_denominator}}*100000) %>%
mutate(acf_period = case_when(acf_period=="a. pre-acf" ~ "Before Intervention",
acf_period=="c. post-acf" ~ "Post Intervention"))
#create the counterfactual (no intervention), and summarise
counterfact <-
add_epred_draws(object = {{model}},
newdata = {{model_data}} %>%
select(year, year2, y_num, {{population_denominator}}, {{grouping_var}}, {{outcome}}) %>%
mutate(acf_period = "a. pre-acf"), re_formula={{re_formula}}) %>%
group_by(year2, acf_period, {{grouping_var}}) %>%
mean_qi() %>%
mutate(.epred_inc = .epred/{{population_denominator}}*100000,
.epred_inc.lower = .epred.lower/{{population_denominator}}*100000,
.epred_inc.upper = .epred.upper/{{population_denominator}}*100000) %>%
mutate(acf_period = case_when(acf_period=="a. pre-acf" ~ "Before Intervention",
acf_period=="c. post-acf" ~ "Post Intervention"))
#plot the intervention effect
p <- summary %>%
droplevels() %>%
ggplot() +
geom_vline(aes(xintercept=acf_start, linetype="Mass CXR screening intervention")) +
geom_vline(aes(xintercept=acf_end, linetype="Mass CXR screening intervention")) +
geom_ribbon(aes(ymin=.epred_inc.lower, ymax=.epred_inc.upper, x=year2, group = acf_period, fill=acf_period), alpha=0.5) +
geom_ribbon(data = counterfact %>% filter(year>=1956),
aes(ymin=.epred_inc.lower, ymax=.epred_inc.upper, x=year2, fill="Counterfactual"), alpha=0.5) +
geom_line(data = counterfact %>% filter(year>=1956),
aes(y=.epred_inc, x=year2, colour="Counterfactual")) +
geom_line(aes(y=.epred_inc, x=year2, group=acf_period, colour=acf_period)) +
geom_point(data = {{model_data}} %>%
mutate(acf_period = case_when(acf_period=="a. pre-acf" ~ "Pre-ACF",
acf_period=="b. acf" ~ "ACF",
acf_period=="c. post-acf" ~ "Post-ACF")),
aes(y={{outcome}}, x=year2, shape=fct_relevel(acf_period,
"Pre-ACF",
"ACF",
"Post-ACF")), size=2) +
theme_grey() +
scale_y_continuous(labels=comma, limits =c(0,400)) +
scale_x_continuous(labels = year_labels,
breaks = year_labels) +
scale_fill_manual(values = c("#DE0D92", "grey50", "#4D6CFA") , name="Model estimates:", na.translate = F) +
scale_colour_manual(values = c("#DE0D92", "grey50", "#4D6CFA") , name="Model estimates:", na.translate = F) +
scale_shape_discrete(name="Empirical data (period):", na.translate = F) +
scale_linetype_manual(values = 2, name="") +
labs(
x = "",
y = "CNR (per 100,000)"
) +
guides(x = "axis_truncated", y = "axis_truncated") +
theme(legend.position = "bottom",
legend.box="vertical",
text = element_text(size=10),
axis.text.x = element_text(size=10, angle = 90, hjust=1, vjust=0.5),
legend.text = element_text(size=10),
legend.spacing.y = unit(0.1, 'cm'),
axis.line = element_line(colour = "black"))
facet_vars <- vars(...)
if (length(facet_vars) != 0) {
p <- p + facet_wrap(facet_vars)
}
p
}
Function for calculating measures of change over time (RR.peak, RR.level, RR.slope)
summarise_change <- function(model_data, model, population_denominator, grouping_var = NULL, re_formula = NULL) {
#functions for calculating RR.peak
#i.e. relative case notification rate in 1957 vs. counterfactual trend for 1957
grouping_var <- enquo(grouping_var)
if (!is.null({{grouping_var}})) {
#make the prediction matrix, conditional on whether we want random effects included or not.
out <- crossing({{model_data}} %>%
select({{population_denominator}}, y_num, !!grouping_var) %>%
filter(y_num == 8),
acf_period = c("a. pre-acf", "b. acf")
)
} else {
out <- crossing({{model_data}} %>%
select({{population_denominator}}, y_num) %>%
filter(y_num == 8),
acf_period = c("a. pre-acf", "b. acf")
)
}
peak_draws <- add_epred_draws(newdata = out,
object = {{model}},
re_formula = {{re_formula}}) %>%
mutate(epred_cnr = .epred/population_without_inst_ship*100000) %>%
group_by(.draw, !!grouping_var) %>%
summarise(estimate = last(epred_cnr)/first(epred_cnr)) %>%
ungroup() %>%
mutate(measure = "RR.peak")
peak_summary <- peak_draws %>%
group_by(!!grouping_var) %>%
mean_qi(estimate) %>%
mutate(measure = "RR.peak")
#functions for calculating RR.level
#i.e. relative case notification rate in 1958 vs. counterfactual trend for 1958
if (!is.null({{grouping_var}})) {
out2 <- crossing({{model_data}} %>%
select({{population_denominator}}, y_num, !!grouping_var) %>%
filter(y_num == 9),
acf_period = c("a. pre-acf", "c. post-acf")
)
} else {
out2 <- crossing({{model_data}} %>%
select({{population_denominator}}, y_num) %>%
filter(y_num == 9),
acf_period = c("a. pre-acf", "c. post-acf")
)
}
level_draws <- add_epred_draws(newdata = out2,
object = {{model}},
re_formula = {{re_formula}}) %>%
arrange(y_num, .draw) %>%
mutate(epred_cnr = .epred/population_without_inst_ship*100000) %>%
group_by(.draw, !!grouping_var) %>%
summarise(estimate = last(epred_cnr)/first(epred_cnr)) %>%
ungroup() %>%
mutate(measure = "RR.level")
level_summary <- level_draws %>%
group_by(!!grouping_var) %>%
mean_qi(estimate) %>%
mutate(measure = "RR.level")
#functions for calculating RR.slope
#i.e. relative change in case notification rate in 1958-1963 vs. counterfactual trend for 1959-1963
if (!is.null({{grouping_var}})) {
out3 <- crossing({{model_data}} %>%
select({{population_denominator}}, y_num, !!grouping_var) %>%
filter(y_num %in% c(9,14)),
acf_period = c("a. pre-acf", "c. post-acf")
)
} else {
out3 <- crossing({{model_data}} %>%
select({{population_denominator}}, y_num) %>%
filter(y_num %in% c(9,14)),
acf_period = c("a. pre-acf", "c. post-acf")
)
}
slope_draws <- add_epred_draws(newdata = out3,
object = {{model}},
re_formula = {{re_formula}}) %>%
arrange(y_num) %>%
ungroup() %>%
mutate(epred_cnr = .epred/population_without_inst_ship*100000) %>%
group_by(.draw, y_num, !!grouping_var) %>%
summarise(slope = last(epred_cnr)/first(epred_cnr)) %>%
ungroup() %>%
group_by(.draw, !!grouping_var) %>%
summarise(estimate = last(slope)/first(slope)) %>%
mutate(measure = "RR.slope")
slope_summary <- slope_draws %>%
group_by(!!grouping_var) %>%
mean_qi(estimate) %>%
mutate(measure = "RR.slope")
#gather all the results into a named list
lst(peak_draws=peak_draws, peak_summary=peak_summary,
level_draws=level_draws, level_summary=level_summary,
slope_draws=slope_draws, slope_summary=slope_summary)
}
Function for calculating difference from counterfactual
calculate_counterfactual <- function(model_data, model, population_denominator, grouping_var=NULL, re_formula=NA){
#effect vs. counterfactual
counterfact <-
add_epred_draws(object = {{model}},
newdata = {{model_data}} %>%
select(year, year2, y_num, {{population_denominator}}, {{grouping_var}}) %>%
mutate(acf_period = "a. pre-acf"),
re_formula = {{re_formula}}) %>%
group_by(.draw, year, {{grouping_var}}, acf_period) %>%
mutate(.epred_inc_counterf = .epred/{{population_denominator}}*100000, .epred_counterf=.epred) %>%
filter(year>1957) %>%
ungroup() %>%
select(year, {{population_denominator}}, .draw, .epred_counterf, .epred_inc_counterf, {{grouping_var}})
#Calcuate case notification rate per draw, then summarise.
post_change <-
add_epred_draws(object = {{model}},
newdata = {{model_data}} %>%
select(year, year2, y_num, {{population_denominator}}, {{grouping_var}}, acf_period),
re_formula = {{re_formula}}) %>%
group_by(.draw, year, {{grouping_var}}, acf_period) %>%
mutate(.epred_inc = .epred/{{population_denominator}}*100000) %>%
filter(year>1957) %>%
ungroup() %>%
select(year, {{population_denominator}}, {{grouping_var}}, .draw, .epred, .epred_inc, {{grouping_var}})
#for the overall period
counterfact_overall <-
add_epred_draws(object = {{model}},
newdata = {{model_data}} %>%
select(year, year2, y_num, {{population_denominator}}, {{grouping_var}}) %>%
mutate(acf_period = "a. pre-acf"),
re_formula = {{re_formula}}) %>%
group_by(.draw, {{grouping_var}}) %>%
filter(year>1957) %>%
ungroup() %>%
select({{population_denominator}}, .draw, .epred, {{grouping_var}}) %>%
group_by(.draw, {{grouping_var}}) %>%
summarise(.epred_counterf = sum(.epred))
#Calcuate case notification rate per draw, then summarise.
post_change_overall <-
add_epred_draws(object = {{model}},
newdata = {{model_data}} %>%
select(year, year2, y_num, {{population_denominator}}, {{grouping_var}}, acf_period),
re_formula = {{re_formula}}) %>%
group_by(.draw, {{grouping_var}}) %>%
filter(year>1957) %>%
ungroup() %>%
select({{population_denominator}}, {{grouping_var}}, .draw, .epred) %>%
group_by(.draw, {{grouping_var}}) %>%
summarise(.epred = sum(.epred))
counter_post <-
left_join(counterfact, post_change) %>%
mutate(cases_averted = .epred_counterf-.epred,
pct_change = (.epred - .epred_counterf)/.epred_counterf,
diff_inc100k = .epred_inc - .epred_inc_counterf,
rr_inc100k = .epred_inc/.epred_inc_counterf) %>%
group_by(year, {{grouping_var}}) %>%
mean_qi(cases_averted, pct_change, diff_inc100k, rr_inc100k) %>%
ungroup()
counter_post_overall <-
left_join(counterfact_overall, post_change_overall) %>%
mutate(cases_averted = .epred_counterf-.epred,
pct_change = (.epred - .epred_counterf)/.epred_counterf) %>%
group_by({{grouping_var}}) %>%
mean_qi(cases_averted, pct_change) %>%
ungroup()
lst(counter_post, counter_post_overall)
}
Function for tidying up counterfactuals (mostly for making nice tables)
tidy_counterfactuals <- function(data){
data %>%
mutate(across(c(cases_averted:cases_averted.upper, diff_inc100k:diff_inc100k.upper), number_format(accuracy = 0.1, big.mark = ","))) %>%
mutate(across(c(rr_inc100k:rr_inc100k.upper), number_format(accuracy = 0.01))) %>%
mutate(across(c(pct_change:pct_change.upper), percent, accuracy=0.1)) %>%
mutate(year = as.character(year),
cases_averted = glue::glue("{cases_averted} ({cases_averted.lower} to {cases_averted.upper})"),
pct_change = glue::glue("{pct_change} ({pct_change.lower} to {pct_change.upper})"),
diff_inc = glue::glue("{diff_inc100k} ({diff_inc100k.lower} to {diff_inc100k.upper})"),
rr_inc = glue::glue("{rr_inc100k} ({rr_inc100k.lower} to {rr_inc100k.upper})"))
}
tidy_counterfactuals_overall <- function(data){
data %>%
mutate(across(c(cases_averted:cases_averted.upper), number_format(accuracy = 0.1, big.mark = ","))) %>%
mutate(across(c(pct_change:pct_change.upper), percent, accuracy=0.1)) %>%
mutate(year = as.character(year),
cases_averted = glue::glue("{cases_averted} ({cases_averted.lower} to {cases_averted.upper})"),
pct_change = glue::glue("{pct_change} ({pct_change.lower} to {pct_change.upper})"))
}
Import datasets for analysis
Make a map of Glasgow wards
glasgow_wards_1951 <- st_read(here("mapping/glasgow_wards_1951.geojson"))
Reading layer `glasgow_wards_1951' from data source
`/Users/peter.macpherson/Library/Mobile Documents/com~apple~CloudDocs/Documents/Documents - Peter’s Mac mini/Projects/Historical TB ACF 2023-11-28/Work/analysis/glasgow-cxr/mapping/glasgow_wards_1951.geojson'
using driver `GeoJSON'
Simple feature collection with 37 features and 3 fields
Geometry type: MULTIPOLYGON
Dimension: XY
Bounding box: xmin: -4.393502 ymin: 55.77464 xmax: -4.070411 ymax: 55.92814
Geodetic CRS: WGS 84
#read in Scotland boundary
scotland <- st_read(here("mapping/Scotland_boundary/Scotland boundary.shp"))
Reading layer `Scotland boundary' from data source
`/Users/peter.macpherson/Library/Mobile Documents/com~apple~CloudDocs/Documents/Documents - Peter’s Mac mini/Projects/Historical TB ACF 2023-11-28/Work/analysis/glasgow-cxr/mapping/Scotland_boundary/Scotland boundary.shp'
using driver `ESRI Shapefile'
Simple feature collection with 1 feature and 1 field
Geometry type: MULTIPOLYGON
Dimension: XY
Bounding box: xmin: 5513 ymin: 530249 xmax: 470332 ymax: 1220302
Projected CRS: OSGB36 / British National Grid
#make a bounding box for Glasgow
bbox <- st_bbox(glasgow_wards_1951) |> st_as_sfc()
#plot scotland with a bounding box around the City of Glasgow
scotland_with_bbox <- ggplot() +
geom_sf(data = scotland, fill="antiquewhite") +
geom_sf(data = bbox, colour = "#C60C30", fill="antiquewhite") +
theme_void() +
theme(panel.border = element_rect(colour = "grey78", fill=NA, linewidth = 0.5),
panel.background = element_rect(fill = "#EAF7FA", size = 0.3))
#plot the wards
#note we tidy up some names to fit on map
glasgow_ward_map <- glasgow_wards_1951 %>%
mutate(ward = case_when(ward=="Shettleston and Tollcross" ~ "Shettleston and\nTollcross",
ward=="Partick (West)" ~ "Partick\n(West)",
ward=="Partick (East)" ~ "Partick\n(East)",
ward=="North Kelvin" ~ "North\nKelvin",
ward=="Kinning Park" ~ "Kinning\nPark",
TRUE ~ ward)) %>%
ggplot() +
geom_sf(aes(fill=division)) +
geom_sf_label(aes(label = ward), size=3, fill=NA, label.size = NA, colour="black") +
#scale_colour_identity() +
scale_fill_brewer(palette = "Set3", name="City of Glasgow Division") +
theme_grey() +
labs(x="",
y="",
fill="Division") +
theme(legend.position = "top",
panel.border = element_rect(colour = "grey78", fill=NA, linewidth = 0.5),
panel.background = element_rect(fill = "antiquewhite", size = 0.3),
panel.grid.major = element_line(color = "grey78")) +
guides(fill=guide_legend(title.position = "top", title.hjust = 0.5, title.theme = element_text(face="bold")))
#add the map of scotland as an inset
glasgow_ward_map + inset_element(scotland_with_bbox, 0.75, 0, 1, 0.4)
Warning in st_point_on_surface.sfc(sf::st_zm(x)) :
st_point_on_surface may not give correct results for longitude/latitude data
ggsave(here("figures/s1.tiff"), height=10, width = 12)
Warning in st_point_on_surface.sfc(sf::st_zm(x)) :
st_point_on_surface may not give correct results for longitude/latitude data
Load in the datasets for denonomiators, and check for consistency.
overall_pops <- read_xlsx(path = "2023-11-28_glasgow-acf.xlsx", sheet = "overall_population")
overall_pops %>%
mutate(across(where(is.numeric) & !(year), ~comma(.))) %>%
datatable()
#shift year to midpoint
overall_pops <- overall_pops %>%
mutate(year2 = year+0.5)
Note, we have three population estimates:
(Population in shipping is estimated from the 1951 census, so is the same for most years)
First, plot the total population
overall_pops %>%
ggplot() +
geom_area(aes(y=total_population, x=year2), alpha=0.5, colour = "mediumseagreen", fill="mediumseagreen") +
geom_point(aes(y=total_population, x=year2), colour = "mediumseagreen") +
geom_vline(aes(xintercept=acf_start), linetype=3) +
geom_vline(aes(xintercept=acf_end), linetype=3) +
scale_y_continuous(labels=comma) +
scale_x_continuous(labels = year_labels,
breaks = year_labels) +
labs(
title = "Glasgow Corporation: total population",
subtitle = "1950 to 1963",
x = "Year",
y = "Population",
caption = "Mid-year estimates\nMass miniature X-ray campaign period between dashed lines (11th March-12th April 1957)"
) +
theme_ggdist()
NA
NA
Now the population excluding institutionalised and shipping population
overall_pops %>%
ggplot() +
geom_area(aes(y=population_without_inst_ship, x=year2), alpha=0.5, colour = "purple", fill="purple") +
geom_point(aes(y=population_without_inst_ship, x=year2), colour = "purple") +
geom_vline(aes(xintercept=acf_start), linetype=3) +
geom_vline(aes(xintercept=acf_end), linetype=3) +
scale_y_continuous(labels=comma) +
scale_x_continuous(labels = year_labels,
breaks = year_labels) +
labs(
title = "Glasgow Corporation: population excluding institutionalised and shipping",
subtitle = "1950 to 1963",
x = "Year",
y = "Population",
caption = "Mid-year estimates\nMass miniature X-ray campaign period between dashed lines (11th March-12th April 1957)"
) +
theme_ggdist()
NA
NA
There are 5 Divisions containing 37 Wards in the Glasgow Corporation, with consistent boundaries over time.
#look-up table for divisions and wards
ward_lookup <- read_xlsx(path = "2023-11-28_glasgow-acf.xlsx", sheet = "divisions_wards")
ward_pops <- read_xlsx(path = "2023-11-28_glasgow-acf.xlsx", sheet = "ward_population")
ward_pops %>%
mutate(across(where(is.numeric) & !(year), ~comma(.))) %>%
datatable()
#shift year to midpoint
ward_pops <- ward_pops %>%
mutate(year2 = year+0.5)
#Get the Division population
division_pops <- ward_pops %>%
group_by(division, year) %>%
summarise(population_without_inst_ship = sum(population_without_inst_ship, na.rm = TRUE),
institutions = sum(institutions, na.rm = TRUE),
shipping = sum(shipping, na.rm = TRUE),
total_population = sum(total_population, na.rm = TRUE))
`summarise()` has grouped output by 'division'. You can override using the `.groups` argument.
division_pops %>%
mutate(across(where(is.numeric) & !(year), ~comma(.))) %>%
datatable()
NA
Plot the overall population by Division and Ward
division_pops %>%
mutate(year2 = year+0.5) %>%
ggplot() +
geom_area(aes(y=total_population, x=year2, colour=division, fill=division), alpha=0.8) +
geom_point(aes(y=total_population, x=year2, colour=division)) +
geom_vline(aes(xintercept=acf_start), linetype=3) +
geom_vline(aes(xintercept=acf_end), linetype=3) +
scale_y_continuous(labels=comma) +
facet_wrap(division~.) +
scale_x_continuous(labels = year_labels,
breaks = year_labels,
guide = guide_axis(angle = 90)) +
scale_fill_brewer(palette = "Set3", name = "") +
scale_colour_brewer(palette = "Set3", name = "") +
labs(
title = "Glasgow Corporation: total population by Division",
subtitle = "1950 to 1963",
x = "Year",
y = "Population",
caption = "Mid-year estimates\nMass miniature X-ray campaign period between dashed lines (11th March-12th April 1957)"
) +
theme_ggdist() +
theme(legend.position = "bottom")
NA
NA
ward_pops %>%
ggplot() +
geom_area(aes(y=total_population, x=year2, colour=division, fill=division)) +
geom_point(aes(y=total_population, x=year2, colour=division), colour="black") +
geom_vline(aes(xintercept=acf_start), linetype=3) +
geom_vline(aes(xintercept=acf_end), linetype=3) +
scale_y_continuous(labels=comma) +
facet_wrap(ward~., ncol=6) +
scale_x_continuous(labels = year_labels,
breaks = year_labels,
guide = guide_axis(angle = 90)) +
scale_fill_brewer(palette = "Set3", name="Division") +
scale_colour_brewer(palette = "Set3", name = "Division") +
labs(
x = "",
y = "Population",
caption = "Mid-year estimates\nMass miniature X-ray campaign period between dashed lines (11th March-12th April 1957)"
) +
theme_ggdist() +
theme(legend.position = "bottom")
ggsave(here("figures/s3.tiff"), height=14, width=12)
Approximately, how many person-years of follow-up do we have?
overall_pops %>%
ungroup() %>%
summarise(across(year, length, .names = "years"),
across(c(population_without_inst_ship, total_population), sum)) %>%
mutate(across(where(is.double), comma)) %>%
datatable()
NA
NA
Change in population by ward
ward_pops %>%
group_by(ward) %>%
summarise(pct_change_pop = (last(population_without_inst_ship) - first(population_without_inst_ship))/first(population_without_inst_ship)) %>%
mutate(pct_change_pop = percent(pct_change_pop)) %>%
arrange(pct_change_pop) %>%
datatable()
NA
NA
NA
age_sex <- read_xlsx(path = "2023-11-28_glasgow-acf.xlsx", sheet = "age_sex_population") %>%
pivot_longer(cols = c(male, female),
names_to = "sex")
#collapse down to smaller age groups to be manageable
age_sex <- age_sex %>%
ungroup() %>%
mutate(age = case_when(age == "0 to 4" ~ "00 to 04",
age == "5 to 9" ~ "05 to 14",
age == "10 to 14" ~ "05 to 14",
age == "15 to 19" ~ "15 to 24",
age == "20 to 24" ~ "15 to 24",
age == "25 to 29" ~ "25 to 34",
age == "30 to 34" ~ "25 to 34",
age == "35 to 39" ~ "35 to 44",
age == "40 to 44" ~ "35 to 44",
age == "45 to 49" ~ "45 to 59",
age == "50 to 54" ~ "45 to 59",
age == "55 to 59" ~ "45 to 59",
TRUE ~ "60 & up")) %>%
group_by(year, age, sex) %>%
mutate(value = sum(value)) %>%
ungroup()
m_age_sex <- lm(value ~ splines::ns(year, knots = 3)*age*sex, data = age_sex)
summary(m_age_sex)
Warning in summary.lm(m_age_sex) :
essentially perfect fit: summary may be unreliable
Call:
lm(formula = value ~ splines::ns(year, knots = 3) * age * sex,
data = age_sex)
Residuals:
Min 1Q Median 3Q Max
-2.107e-10 -7.560e-13 0.000e+00 0.000e+00 2.107e-10
Coefficients: (14 not defined because of singularities)
Estimate Std. Error t value Pr(>|t|)
(Intercept) 5.222e+04 3.820e-10 1.367e+14 <2e-16 ***
splines::ns(year, knots = 3)1 -8.043e+03 7.621e-10 -1.055e+13 <2e-16 ***
splines::ns(year, knots = 3)2 NA NA NA NA
age05 to 14 3.669e+04 4.679e-10 7.843e+13 <2e-16 ***
age15 to 24 -3.893e+03 4.679e-10 -8.320e+12 <2e-16 ***
age25 to 34 -3.996e+04 4.679e-10 -8.540e+13 <2e-16 ***
age35 to 44 -4.230e+04 4.679e-10 -9.040e+13 <2e-16 ***
age45 to 59 5.459e+04 4.411e-10 1.238e+14 <2e-16 ***
age60 & up 7.533e+04 4.126e-10 1.826e+14 <2e-16 ***
sexmale 3.374e+03 5.402e-10 6.244e+12 <2e-16 ***
splines::ns(year, knots = 3)1:age05 to 14 -1.863e+03 9.334e-10 -1.996e+12 <2e-16 ***
splines::ns(year, knots = 3)2:age05 to 14 NA NA NA NA
splines::ns(year, knots = 3)1:age15 to 24 7.533e+04 9.334e-10 8.070e+13 <2e-16 ***
splines::ns(year, knots = 3)2:age15 to 24 NA NA NA NA
splines::ns(year, knots = 3)1:age25 to 34 1.325e+05 9.334e-10 1.420e+14 <2e-16 ***
splines::ns(year, knots = 3)2:age25 to 34 NA NA NA NA
splines::ns(year, knots = 3)1:age35 to 44 1.380e+05 9.334e-10 1.479e+14 <2e-16 ***
splines::ns(year, knots = 3)2:age35 to 44 NA NA NA NA
splines::ns(year, knots = 3)1:age45 to 59 3.474e+03 8.800e-10 3.948e+12 <2e-16 ***
splines::ns(year, knots = 3)2:age45 to 59 NA NA NA NA
splines::ns(year, knots = 3)1:age60 & up -8.453e+04 8.232e-10 -1.027e+14 <2e-16 ***
splines::ns(year, knots = 3)2:age60 & up NA NA NA NA
splines::ns(year, knots = 3)1:sexmale -1.994e+03 1.078e-09 -1.850e+12 <2e-16 ***
splines::ns(year, knots = 3)2:sexmale NA NA NA NA
age05 to 14:sexmale 1.053e+04 6.617e-10 1.592e+13 <2e-16 ***
age15 to 24:sexmale 2.352e+04 6.617e-10 3.555e+13 <2e-16 ***
age25 to 34:sexmale 1.355e+04 6.617e-10 2.047e+13 <2e-16 ***
age35 to 44:sexmale -1.727e+03 6.617e-10 -2.611e+12 <2e-16 ***
age45 to 59:sexmale 2.774e+03 6.238e-10 4.446e+12 <2e-16 ***
age60 & up:sexmale -7.761e+04 5.835e-10 -1.330e+14 <2e-16 ***
splines::ns(year, knots = 3)1:age05 to 14:sexmale -2.049e+04 1.320e-09 -1.552e+13 <2e-16 ***
splines::ns(year, knots = 3)2:age05 to 14:sexmale NA NA NA NA
splines::ns(year, knots = 3)1:age15 to 24:sexmale -6.780e+04 1.320e-09 -5.136e+13 <2e-16 ***
splines::ns(year, knots = 3)2:age15 to 24:sexmale NA NA NA NA
splines::ns(year, knots = 3)1:age25 to 34:sexmale -3.804e+04 1.320e-09 -2.882e+13 <2e-16 ***
splines::ns(year, knots = 3)2:age25 to 34:sexmale NA NA NA NA
splines::ns(year, knots = 3)1:age35 to 44:sexmale -1.171e+04 1.320e-09 -8.874e+12 <2e-16 ***
splines::ns(year, knots = 3)2:age35 to 44:sexmale NA NA NA NA
splines::ns(year, knots = 3)1:age45 to 59:sexmale -3.473e+04 1.244e-09 -2.791e+13 <2e-16 ***
splines::ns(year, knots = 3)2:age45 to 59:sexmale NA NA NA NA
splines::ns(year, knots = 3)1:age60 & up:sexmale 1.056e+05 1.164e-09 9.071e+13 <2e-16 ***
splines::ns(year, knots = 3)2:age60 & up:sexmale NA NA NA NA
---
Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
Residual standard error: 5.755e-11 on 44 degrees of freedom
Multiple R-squared: 1, Adjusted R-squared: 1
F-statistic: 1.714e+29 on 27 and 44 DF, p-value: < 2.2e-16
age_levels <- age_sex %>% select(age) %>% distinct() %>% pull()
age_sex_nd <-
crossing(
age=age_levels,
sex=c("male", "female"),
year = 1950:1963
)
pred_pops <- age_sex_nd %>% modelr::add_predictions(m_age_sex)
pred_pops %>%
ggplot(aes(x=year, y=pred, colour=age)) +
geom_line() +
geom_point() +
facet_grid(sex~.) +
scale_y_continuous(labels = comma, limits = c(0, 125000))
#How well do they match up with our overall populations?
pred_pops %>%
group_by(year) %>%
summarise(sum_pred_pop = sum(pred)) %>%
right_join(overall_pops) %>%
select(year, sum_pred_pop, population_without_inst_ship, total_population) %>%
pivot_longer(cols = c(sum_pred_pop, population_without_inst_ship, total_population)) %>%
ggplot(aes(x=year, y=value, colour=name)) +
geom_point() +
scale_y_continuous(labels = comma, limits = c(800000, 1250000))
Joining with `by = join_by(year)`
pred_pops %>%
group_by(year, sex) %>%
summarise(sum = sum(pred)) %>%
group_by(year) %>%
mutate(sex_ratio = first(sum)/last(sum))
`summarise()` has grouped output by 'year'. You can override using the `.groups` argument.
Population pyramids
label_abs <- function(x) {
comma(abs(x))
}
pred_pops %>%
ungroup() %>%
group_by(year) %>%
mutate(year_pop = sum(pred),
age_sex_pct = percent(pred/year_pop, accuracy=0.1)) %>%
mutate(sex = case_when(sex=="male" ~ "Male",
sex=="female" ~ "Female")) %>%
ggplot(
aes(x = age, fill = sex,
y = ifelse(test = sex == "Female",yes = -pred, no = pred))) +
geom_bar(stat = "identity") +
geom_text(aes(label = age_sex_pct),
position= position_stack(vjust=0.5), colour="black", size=2.5) +
facet_wrap(year~., ncol=7) +
coord_flip() +
scale_y_continuous(labels = label_abs) +
scale_fill_manual(values = c("#CD7AC5", "cadetblue3"), name="") +
theme_ggdist() +
theme(axis.text.x = element_text(angle=90, hjust = 1, vjust=0.5),
legend.position = "bottom",
panel.border = element_rect(colour = "grey78", fill=NA)) +
labs(x="", y="")
ggsave(here("figures/s4.tiff"), width=10)
Saving 10 x 4.51 in image
Not perfect, but resonably good. But ahhhhh… the age groups don’t align with the case notification age groups! Come back to think about this later.
Import the tuberculosis cases dataset
Overall, by year.
cases_by_year <- read_xlsx("2023-11-28_glasgow-acf.xlsx", sheet = "by_year")
cases_by_year%>%
mutate(across(where(is.numeric) & !(year), ~comma(.))) %>%
datatable()
#shift year to midpoint
cases_by_year <- cases_by_year %>%
mutate(year2 = year+0.5)
Plot the overall number of case notified per year, by pulmonary and extra pulmonary classification.
cases_by_year %>%
select(-total_notifications, -year) %>%
pivot_longer(cols = c(pulmonary_notifications, `non-pulmonary_notifications`)) %>%
mutate(name = case_when(name == "pulmonary_notifications" ~ "Pulmonary TB",
name == "non-pulmonary_notifications" ~ "Extra-pulmonary TB")) %>%
ggplot() +
geom_area(aes(y=value, x=year2, group = name, fill=name), alpha=0.5) +
geom_vline(aes(xintercept=acf_start), linetype=3) +
geom_vline(aes(xintercept=acf_end), linetype=3) +
scale_y_continuous(labels=comma) +
scale_x_continuous(labels = year_labels,
breaks = year_labels) +
scale_fill_brewer(palette = "Set1", name="") +
labs(
title = "Glasgow Corporation: Tuberculosis notifications",
subtitle = "1950 to 1963, by TB disease classification",
x = "Year",
y = "Number of cases",
caption = "Mass miniature X-ray campaign period between dashed lines (11th March-12th April 1957)"
) +
theme_ggdist() +
theme(legend.position = "bottom")
NA
NA
Read in the datasets and merge together.
#list all the sheets
all_sheets <- excel_sheets("2023-11-28_glasgow-acf.xlsx")
#get the ward sheets
ward_sheets <- enframe(all_sheets) %>%
filter(grepl("by_ward", value)) %>%
pull(value)
cases_by_ward_sex_year <- map_df(ward_sheets, ~read_xlsx(path = "2023-11-28_glasgow-acf.xlsx",
sheet = .))
cases_by_ward_sex_year %>%
mutate(across(where(is.numeric) & !(year), ~comma(.))) %>%
datatable()
NA
Aggregate together to get cases by division
cases_by_division <- cases_by_ward_sex_year %>%
left_join(ward_lookup) %>%
group_by(division, year, tb_type) %>%
summarise(cases = sum(cases, na.rm = TRUE))
Joining with `by = join_by(ward)`
`summarise()` has grouped output by 'division', 'year'. You can override using the `.groups` argument.
#shift year to midpoint
cases_by_division <- cases_by_division %>%
mutate(year2 = year+0.5) %>%
ungroup()
cases_by_division %>%
select(-year2) %>%
select(year, everything()) %>%
mutate(across(where(is.numeric) & !(year), ~comma(.))) %>%
datatable()
cases_by_division %>%
mutate(tb_type = case_when(tb_type == "Pulmonary" ~ "Pulmonary TB",
tb_type == "Non-Pulmonary" ~ "Extra-pulmonary TB")) %>%
ggplot() +
geom_area(aes(y=cases, x=year2, group = tb_type, fill=tb_type), alpha=0.5) +
geom_vline(aes(xintercept=acf_start), linetype=3) +
geom_vline(aes(xintercept=acf_end), linetype=3) +
scale_y_continuous(labels=comma) +
scale_x_continuous(labels = year_labels,
breaks = year_labels,
guide = guide_axis(angle = 90)) +
facet_wrap(division~., scales = "free_y") +
scale_fill_brewer(palette = "Set1", name="") +
labs(
title = "Glasgow Corporation: Tuberculosis notifications by Division",
subtitle = "1950 to 1963, by TB disease classification",
x = "Year",
y = "Number of cases",
caption = "Mass miniature X-ray campaign period between dashed lines (11th March-12th April 1957)\nNote: extra-pulmonary TB cases by Division/Ward not reported in 1962-1963"
) +
theme_ggdist() +
theme(legend.position = "bottom")
cases_by_ward <- cases_by_ward_sex_year %>%
group_by(ward, year, tb_type) %>%
summarise(cases = sum(cases, na.rm = TRUE)) %>%
ungroup()
`summarise()` has grouped output by 'ward', 'year'. You can override using the `.groups` argument.
cases_by_ward %>%
mutate(across(where(is.numeric) & !(year), ~comma(.))) %>%
select(year, everything()) %>%
datatable()
#shift year to midpoint
cases_by_ward <- cases_by_ward %>%
mutate(year2 = year+0.5)
cases_by_ward %>%
mutate(tb_type = case_when(tb_type == "Pulmonary" ~ "Pulmonary TB",
tb_type == "Non-Pulmonary" ~ "Extra-pulmonary TB")) %>%
ggplot() +
geom_area(aes(y=cases, x=year2, group = tb_type, fill=tb_type), alpha=0.8) +
geom_vline(aes(xintercept=acf_start), linetype=3) +
geom_vline(aes(xintercept=acf_end), linetype=3) +
scale_y_continuous(labels=comma) +
scale_x_continuous(labels = year_labels,
breaks = year_labels,
guide = guide_axis(angle = 90)) +
facet_wrap(ward~., scales = "free_y") +
scale_fill_brewer(palette = "Set1", name="") +
labs(
title = "Glasgow Corporation: Tuberculosis notifications by Ward",
subtitle = "1950 to 1963, by TB disease classification",
x = "Year",
y = "Number of cases",
caption = "Mass miniature X-ray campaign period between dashed lines (11th March-12th April 1957)\nNote: extra-pulmonary TB cases by Division/Ward not reported in 1962-1963"
) +
theme(legend.position = "bottom")
NA
NA
As we don’t have denominators, we will just model the change in counts.
#list all the sheets
all_sheets <- excel_sheets("2023-11-28_glasgow-acf.xlsx")
#get the ward sheets
age_sex_sheets <- enframe(all_sheets) %>%
filter(grepl("by_age_sex", value)) %>%
pull(value)
cases_by_age_sex <- map_df(age_sex_sheets, ~read_xlsx(path = "2023-11-28_glasgow-acf.xlsx",
sheet = .))
cases_by_age_sex %>%
mutate(across(where(is.numeric) & !(year), ~comma(.))) %>%
datatable()
NA
NA
What percentage of adults (15+ participated in the intervention in 1957)?
Note that in the Report of Sir Kenneth Cowan, we have the following estimates of participation (we will use these for the manuscript, as they are not based on my estimates)
male_adult_resident_participation <- 281875
female_adult_resident_participation <- 340474
male_adult_resident_population <- 381713
female_adult_resident_population <- 437588
#overall participation
(male_adult_resident_participation+female_adult_resident_participation)/(male_adult_resident_population+female_adult_resident_population)
[1] 0.7596097
#male participation
male_adult_resident_participation/male_adult_resident_population
[1] 0.7384475
#female participation
female_adult_resident_participation/female_adult_resident_population
[1] 0.7780698
Look at uptake of screening by age and sex
uptake_age_sex <- read_xlsx("2024-03-26_mass_xray_uptake.xlsx", sheet = "uptake_age_sex")
uptake_graph <- uptake_age_sex %>%
mutate(uptake = examined/resident_population) %>%
mutate(examined_l = comma(examined),
resident_population_l = comma(resident_population),
uptake_l = percent(uptake, accuracy=0.1)) %>%
mutate(label = glue("{examined_l}/{resident_population_l} ({uptake_l})")) %>%
filter(age !="00-14") %>%
mutate(sex = case_when(sex=="m" ~ "Male",
sex=="f" ~ "Female")) %>%
ggplot(aes(x=age, y=uptake, group=sex, fill=sex)) +
geom_bar(stat = "identity", position = "dodge") +
geom_text(aes(label=uptake_l), position = position_dodge(width=0.85),vjust=2) +
scale_y_continuous(labels=percent) +
scale_fill_manual(values = c("#CD7AC5", "cadetblue3"), name="") +
theme_ggdist() +
theme(legend.position = "bottom",
panel.border = element_rect(colour = "grey78", fill=NA)) +
labs(x="", y="")
Combine figure with table for single figure.
uptake_table <- uptake_age_sex %>%
mutate(resident_population = comma(resident_population),
examined = comma(examined)) %>%
rename(Sex = sex,
Age = age,
`Resident population` = resident_population,
Examined = examined) %>%
mutate(Sex = case_when(Sex=="m" ~ "Male",
Sex=="f" ~ "Female"))
uptake_graph / gridExtra::tableGrob(uptake_table, rows = NULL)
ggsave(here("figures/s5.tiff"), height=10)
Saving 7.29 x 10 in image
Uptake by division
uptake_division <- read_xlsx("2024-03-26_mass_xray_uptake.xlsx", sheet = "uptake_division")
division_pops %>%
filter(year==1957) %>%
select(division, population_without_inst_ship) %>%
left_join(uptake_division) %>%
mutate(pct_pop_examined = examined/population_without_inst_ship)
Joining with `by = join_by(division)`
Now calculate case notification rates per 100,000 population
Merge the notification and population denominator datasets together.
Here we need to include the whole population (with shipping and institutions) as they are included in the notifications.
overall_inc <- overall_pops %>%
left_join(cases_by_year)
Joining with `by = join_by(year, year2)`
overall_inc <- overall_inc %>%
mutate(inc_pulm_100k = pulmonary_notifications/total_population*100000,
inc_ep_100k = `non-pulmonary_notifications`/total_population*100000,
inc_100k = total_notifications/total_population*100000)
overall_inc %>%
select(year, inc_100k, inc_pulm_100k, inc_ep_100k) %>%
mutate_at(.vars = vars(inc_100k, inc_pulm_100k, inc_ep_100k),
.funs = funs(round)) %>%
datatable()
Warning: `funs()` was deprecated in dplyr 0.8.0.
ℹ Please use a list of either functions or lambdas:
# Simple named list: list(mean = mean, median = median)
# Auto named with `tibble::lst()`: tibble::lst(mean, median)
# Using lambdas list(~ mean(., trim = .2), ~ median(., na.rm = TRUE))
Call `lifecycle::last_lifecycle_warnings()` to see where this warning was generated.
overall_inc %>%
select(year2, inc_pulm_100k, inc_ep_100k) %>%
pivot_longer(cols = c(inc_pulm_100k, `inc_ep_100k`)) %>%
mutate(name = case_when(name == "inc_pulm_100k" ~ "Pulmonary TB",
name == "inc_ep_100k" ~ "Extra-pulmonary TB")) %>%
ggplot() +
geom_area(aes(y=value, x=year2, group = name, fill=name), alpha=0.5) +
geom_vline(aes(xintercept=acf_start), linetype=3) +
geom_vline(aes(xintercept=acf_end), linetype=3) +
scale_y_continuous(labels=comma) +
scale_x_continuous(labels = year_labels,
breaks = year_labels) +
scale_fill_brewer(palette = "Set1", name="") +
labs(
title = "Glasgow Corporation: Tuberculosis case notification rate",
subtitle = "1950 to 1963, by TB disease classification",
x = "Year",
y = "Case notification rate (per 100,000)",
caption = "Mass miniature X-ray campaign period between dashed lines (11th March-12th April 1957)"
) +
theme_ggdist() +
theme(legend.position = "bottom")
NA
NA
NA
Change in case notification rates pre-intervention
#pre-ACF
overall_inc %>%
filter(year %in% 1950:1956) %>%
summarise(change = (((last(inc_pulm_100k)-first(inc_pulm_100k))/first(inc_pulm_100k))/7)*100)
#post-ACF
overall_inc %>%
filter(year %in% 1958:1963) %>%
summarise(change = (((last(inc_pulm_100k)-first(inc_pulm_100k))/first(inc_pulm_100k))/6)*100)
NA
division_inc <- division_pops %>%
left_join(cases_by_division)
Joining with `by = join_by(division, year)`
division_inc <- division_inc %>%
mutate(inc_100k = cases/total_population*100000)
division_inc %>%
select(year, division, tb_type, inc_100k) %>%
mutate_at(.vars = vars(inc_100k),
.funs = funs(round)) %>%
datatable()
Warning: `funs()` was deprecated in dplyr 0.8.0.
ℹ Please use a list of either functions or lambdas:
# Simple named list: list(mean = mean, median = median)
# Auto named with `tibble::lst()`: tibble::lst(mean, median)
# Using lambdas list(~ mean(., trim = .2), ~ median(., na.rm = TRUE))
Call `lifecycle::last_lifecycle_warnings()` to see where this warning was generated.
division_inc %>%
mutate(tb_type = case_when(tb_type == "Pulmonary" ~ "Pulmonary TB",
tb_type == "Non-Pulmonary" ~ "Extra-pulmonary TB")) %>%
ggplot() +
geom_area(aes(y=inc_100k, x=year2, group = tb_type, fill=tb_type), alpha=0.5) +
geom_vline(aes(xintercept=acf_start), linetype=3) +
geom_vline(aes(xintercept=acf_end), linetype=3) +
scale_y_continuous(labels=comma) +
scale_x_continuous(labels = year_labels,
breaks = year_labels,
guide = guide_axis(angle = 90)) +
facet_wrap(division~.) +
scale_fill_brewer(palette = "Set1", name="") +
labs(
title = "Glasgow Corporation: Tuberculosis case notification rate, by Division",
subtitle = "1950 to 1963, by TB disease classification",
x = "Year",
y = "Case notification rate (per 100,000)",
caption = "Mass miniature X-ray campaign period between dashed lines (11th March-12th April 1957)\nNote: extra-pulmonary TB cases by Division/Ward not reported in 1962-1963"
) +
theme_ggdist() +
theme(legend.position = "bottom")
NA
NA
NA
Here we will filter out the institutions and harbour from the denominators, as we don’t have reliable population denominators for them.
ward_inc <- ward_pops %>%
left_join(cases_by_ward)
Joining with `by = join_by(ward, year, year2)`
ward_inc <- ward_inc %>%
mutate(inc_100k = cases/population_without_inst_ship*100000)
ward_inc %>%
select(year, ward, tb_type, inc_100k) %>%
mutate_at(.vars = vars(inc_100k),
.funs = funs(round)) %>%
datatable()
Warning: `funs()` was deprecated in dplyr 0.8.0.
ℹ Please use a list of either functions or lambdas:
# Simple named list: list(mean = mean, median = median)
# Auto named with `tibble::lst()`: tibble::lst(mean, median)
# Using lambdas list(~ mean(., trim = .2), ~ median(., na.rm = TRUE))
Call `lifecycle::last_lifecycle_warnings()` to see where this warning was generated.
ward_inc %>%
mutate(tb_type = case_when(tb_type == "Pulmonary" ~ "Pulmonary TB",
tb_type == "Non-Pulmonary" ~ "Extra-pulmonary TB")) %>%
ggplot() +
geom_area(aes(y=inc_100k, x=year2, group = tb_type, fill=tb_type), alpha=0.5) +
geom_vline(aes(xintercept=acf_start), linetype=3) +
geom_vline(aes(xintercept=acf_end), linetype=3) +
scale_y_continuous(labels=comma) +
scale_x_continuous(labels = year_labels,
breaks = year_labels,
guide = guide_axis(angle = 90)) +
facet_wrap(ward~.) +
scale_fill_brewer(palette = "Set1", name="") +
labs(
title = "Glasgow Corporation: Tuberculosis case notification rate, by Ward",
subtitle = "1950 to 1963, by TB disease classification",
x = "Year",
y = "Incidence (per 100,000)",
caption = "Mass miniature X-ray campaign period between dashed lines (11th March-12th April 1957)\nNote: extra-pulmonary TB cases by Division/Ward not reported in 1962-1963"
) +
theme(legend.position = "bottom")
NA
NA
NA
NA
On a map
st_as_sf(left_join(ward_inc, glasgow_wards_1951)) %>%
filter(tb_type=="Pulmonary") %>%
ggplot() +
geom_sf(aes(fill=inc_100k)) +
facet_wrap(year~., ncol = 7) +
scale_fill_viridis_c(name="Case notification rate (per 100,000)",
option = "A") +
theme_ggdist() +
theme(legend.position = "top",
legend.key.width = unit(2, "cm"),
panel.border = element_rect(colour = "grey78", fill=NA)) +
guides(fill=guide_colorbar(title.position = "top"))
Joining with `by = join_by(division, ward, ward_number)`
Import the TB mortality data.
First, overall deaths. Note that in the original reports, we have a pulmonary TB death rate per million for all years, and numbers of pulmonary TB deaths for each year apart from 1950.
#get the overall mortality sheets
deaths_sheets <- enframe(all_sheets) %>%
filter(grepl("deaths", value)) %>%
pull(value)
overall_deaths <- map_df(deaths_sheets, ~read_xlsx(path = "2023-11-28_glasgow-acf.xlsx",
sheet = .))
overall_deaths %>%
mutate(across(where(is.numeric) & !(year), ~comma(.))) %>%
datatable()
NA
NA
NA
Plot the raw numbers of pulmonary deaths
overall_deaths %>%
ggplot(aes(x=year, y=pulmonary_deaths)) +
geom_line(colour = "#DE0D92") +
geom_point(colour = "#DE0D92") +
geom_vline(aes(xintercept=acf_start), linetype=3) +
geom_vline(aes(xintercept=acf_end), linetype=3) +
labs(y="Pulmonary TB deaths per year",
x = "Year",
title = "Numbers of pulmonary TB deaths",
subtitle = "Glasgow, 1950-1963",
caption = "Mass miniature X-ray campaign period between dashed lines (11th March-12th April 1957)\nNote: no data for 1950") +
theme_ggdist() +
theme(panel.border = element_rect(colour = "grey78", fill=NA))
Warning: Removed 1 row containing missing values or values outside the scale range (`geom_line()`).
Warning: Removed 1 row containing missing values or values outside the scale range (`geom_point()`).
Now the incidence of pulmonary TB death
overall_deaths %>%
ggplot(aes(x=year, y=pulmonary_death_rate_per_100k)) +
geom_line(colour = "#4D6CFA") +
geom_point(colour = "#4D6CFA") +
geom_vline(aes(xintercept=acf_start), linetype=3) +
geom_vline(aes(xintercept=acf_end), linetype=3) +
scale_y_continuous(labels=comma) +
scale_x_continuous(labels = year_labels,
breaks = year_labels) +
labs(y="Annual incidence of death (per 100,000)",
x = "Year",
caption = "Mass miniature X-ray campaign period between dashed lines (11th March-12th April 1957)") +
theme_ggdist() +
theme(panel.border = element_rect(colour = "grey78", fill=NA))
ggsave(here("figures/deaths.tiff"), width=10)
Saving 10 x 4.51 in image
Make Table 1 here, and save for publication.
overall_pops %>%
select(year, total_population) %>%
left_join(overall_inc %>%
select(year,
pulmonary_notifications, inc_pulm_100k,
`non-pulmonary_notifications`, inc_ep_100k,
total_notifications, inc_100k)) %>%
left_join(overall_deaths %>%
select(year,
pulmonary_deaths, pulmonary_death_rate_per_100k)) %>%
mutate(percent_pulmonary = percent(pulmonary_notifications/(total_notifications ), accuracy=0.1)) %>%
mutate(across(where(is.numeric) & !(year), ~round(., digits=1))) %>%
mutate(across(where(is.numeric) & !(year), ~comma(.)))
Joining with `by = join_by(year)`
Joining with `by = join_by(year)`
Comparison fo age-sex distribution of cases in 1950-1956 vs. 1957
label_abs2 <- function(x) {
percent(abs(x))
}
cases_by_age_sex %>%
ungroup() %>%
filter(tb_type=="Pulmonary") %>%
mutate(acf_period = case_when(year %in% c(1950:1956) ~ "a. pre-acf",
year %in% c(1957) ~ "b. acf",
year %in% c(1958:1963) ~ "c. post-acf")) %>%
group_by(acf_period, age, sex) %>%
summarise(cases = sum(cases)) %>%
ungroup() %>%
group_by(acf_period) %>%
mutate(period_total = sum(cases)) %>%
mutate(pct = cases/period_total) %>%
mutate(pct2 = case_when(sex=="F" ~ -pct,
TRUE ~ pct)) %>%
mutate(sex = case_when(sex=="M" ~ "Male",
sex=="F" ~ "Female")) %>%
mutate(age = case_when(age=="00_05" ~ "0 to 5y",
age=="06_15" ~ "06 to 15y",
age=="16_25" ~ "16 to 25y",
age=="26_35" ~ "26 to 35y",
age=="36_45" ~ "36 to 45y",
age=="46_55" ~ "46 to 55y",
age=="56_65" ~ "56 to 65y",
age=="65+" ~ "65 & up y")) %>%
mutate(acf_period = case_when(acf_period=="a. pre-acf" ~ "Pre-ACF",
acf_period=="b. acf" ~ "ACF",
acf_period=="c. post-acf" ~ "Post-ACF")) %>%
ggplot() +
geom_vline(aes(xintercept=0), linetype=2) +
geom_point(aes(x=pct2,y=age, colour=fct_relevel(acf_period,
"Pre-ACF",
"ACF",
"Post-ACF")), stat="identity") +
scale_x_continuous(labels=label_abs2, limits = c(-0.2, 0.2)) +
scale_colour_manual(values = c("#DE0D92", "grey50", "#4D6CFA")) +
theme_grey(base_family = "Aptos") +
labs(x= "<- Female Percent of cases Male ->",
y="") +
theme(legend.title = element_blank(),
legend.position = "bottom")
`summarise()` has grouped output by 'acf_period', 'age'. You can override using the `.groups` argument.
Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, :
no font could be found for family "Aptos"
Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, :
no font could be found for family "Aptos"
Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, :
Unable to calculate text width/height (using zero)
Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, :
no font could be found for family "Aptos"
Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, :
Unable to calculate text width/height (using zero)
Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, :
no font could be found for family "Aptos"
Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, :
Unable to calculate text width/height (using zero)
Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, :
no font could be found for family "Aptos"
Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, :
Unable to calculate text width/height (using zero)
Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, :
no font could be found for family "Aptos"
Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, :
Unable to calculate text width/height (using zero)
Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, :
no font could be found for family "Aptos"
Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, :
Unable to calculate text width/height (using zero)
Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, :
no font could be found for family "Aptos"
Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, :
Unable to calculate text width/height (using zero)
Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, :
no font could be found for family "Aptos"
Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, :
Unable to calculate text width/height (using zero)
Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, :
no font could be found for family "Aptos"
Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, :
Unable to calculate text width/height (using zero)
Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, :
no font could be found for family "Aptos"
Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, :
Unable to calculate text width/height (using zero)
Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, :
no font could be found for family "Aptos"
Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, :
Unable to calculate text width/height (using zero)
Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, :
no font could be found for family "Aptos"
Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, :
Unable to calculate text width/height (using zero)
Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, :
no font could be found for family "Aptos"
Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, :
Unable to calculate text width/height (using zero)
Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, :
no font could be found for family "Aptos"
Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, :
Unable to calculate text width/height (using zero)
Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, :
no font could be found for family "Aptos"
Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, :
Unable to calculate text width/height (using zero)
Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, :
no font could be found for family "Aptos"
Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, :
Unable to calculate text width/height (using zero)
Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, :
no font could be found for family "Aptos"
Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, :
Unable to calculate text width/height (using zero)
Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, :
no font could be found for family "Aptos"
Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, :
Unable to calculate text width/height (using zero)
Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, :
no font could be found for family "Aptos"
Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, :
Unable to calculate text width/height (using zero)
Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, :
no font could be found for family "Aptos"
Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, :
Unable to calculate text width/height (using zero)
Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, :
no font could be found for family "Aptos"
Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, :
Unable to calculate text width/height (using zero)
Warning in grid.Call.graphics(C_text, as.graphicsAnnot(x$label), x$x, x$y, :
no font could be found for family "Aptos"
Warning in grid.Call.graphics(C_text, as.graphicsAnnot(x$label), x$x, x$y, :
no font could be found for family "Aptos"
Warning in grid.Call.graphics(C_text, as.graphicsAnnot(x$label), x$x, x$y, :
no font could be found for family "Aptos"
Warning in grid.Call.graphics(C_text, as.graphicsAnnot(x$label), x$x, x$y, :
no font could be found for family "Aptos"
Warning in grid.Call.graphics(C_text, as.graphicsAnnot(x$label), x$x, x$y, :
no font could be found for family "Aptos"
Warning in grid.Call.graphics(C_text, as.graphicsAnnot(x$label), x$x, x$y, :
no font could be found for family "Aptos"
Warning in grid.Call.graphics(C_text, as.graphicsAnnot(x$label), x$x, x$y, :
no font could be found for family "Aptos"
Warning in grid.Call.graphics(C_text, as.graphicsAnnot(x$label), x$x, x$y, :
no font could be found for family "Aptos"
Warning in grid.Call.graphics(C_text, as.graphicsAnnot(x$label), x$x, x$y, :
no font could be found for family "Aptos"
Warning in grid.Call.graphics(C_text, as.graphicsAnnot(x$label), x$x, x$y, :
no font could be found for family "Aptos"
Warning in grid.Call.graphics(C_text, as.graphicsAnnot(x$label), x$x, x$y, :
no font could be found for family "Aptos"
Warning in grid.Call.graphics(C_text, as.graphicsAnnot(x$label), x$x, x$y, :
no font could be found for family "Aptos"
Warning in grid.Call.graphics(C_text, as.graphicsAnnot(x$label), x$x, x$y, :
no font could be found for family "Aptos"
Warning in grid.Call.graphics(C_text, as.graphicsAnnot(x$label), x$x, x$y, :
no font could be found for family "Aptos"
Warning in grid.Call.graphics(C_text, as.graphicsAnnot(x$label), x$x, x$y, :
no font could be found for family "Aptos"
Warning in grid.Call.graphics(C_text, as.graphicsAnnot(x$label), x$x, x$y, :
no font could be found for family "Aptos"
Warning in grid.Call.graphics(C_text, as.graphicsAnnot(x$label), x$x, x$y, :
no font could be found for family "Aptos"
Warning in grid.Call.graphics(C_text, as.graphicsAnnot(x$label), x$x, x$y, :
no font could be found for family "Aptos"
Warning in grid.Call.graphics(C_text, as.graphicsAnnot(x$label), x$x, x$y, :
no font could be found for family "Aptos"
Warning in grid.Call.graphics(C_text, as.graphicsAnnot(x$label), x$x, x$y, :
no font could be found for family "Aptos"
Warning in grid.Call.graphics(C_text, as.graphicsAnnot(x$label), x$x, x$y, :
no font could be found for family "Aptos"
Warning in grid.Call.graphics(C_text, as.graphicsAnnot(x$label), x$x, x$y, :
no font could be found for family "Aptos"
Warning in grid.Call.graphics(C_text, as.graphicsAnnot(x$label), x$x, x$y, :
no font could be found for family "Aptos"
Warning in grid.Call.graphics(C_text, as.graphicsAnnot(x$label), x$x, x$y, :
no font could be found for family "Aptos"
Warning in grid.Call.graphics(C_text, as.graphicsAnnot(x$label), x$x, x$y, :
no font could be found for family "Aptos"
Warning in grid.Call.graphics(C_text, as.graphicsAnnot(x$label), x$x, x$y, :
no font could be found for family "Aptos"
Warning in grid.Call.graphics(C_text, as.graphicsAnnot(x$label), x$x, x$y, :
no font could be found for family "Aptos"
Warning in grid.Call.graphics(C_text, as.graphicsAnnot(x$label), x$x, x$y, :
no font could be found for family "Aptos"
Warning in grid.Call.graphics(C_text, as.graphicsAnnot(x$label), x$x, x$y, :
no font could be found for family "Aptos"
Warning in grid.Call.graphics(C_text, as.graphicsAnnot(x$label), x$x, x$y, :
no font could be found for family "Aptos"
Warning in grid.Call.graphics(C_text, as.graphicsAnnot(x$label), x$x, x$y, :
no font could be found for family "Aptos"
Warning in grid.Call.graphics(C_text, as.graphicsAnnot(x$label), x$x, x$y, :
no font could be found for family "Aptos"
Warning in grid.Call.graphics(C_text, as.graphicsAnnot(x$label), x$x, x$y, :
no font could be found for family "Aptos"
Warning in grid.Call.graphics(C_text, as.graphicsAnnot(x$label), x$x, x$y, :
no font could be found for family "Aptos"
Warning in grid.Call.graphics(C_text, as.graphicsAnnot(x$label), x$x, x$y, :
no font could be found for family "Aptos"
Warning in grid.Call.graphics(C_text, as.graphicsAnnot(x$label), x$x, x$y, :
no font could be found for family "Aptos"
Warning in grid.Call.graphics(C_text, as.graphicsAnnot(x$label), x$x, x$y, :
no font could be found for family "Aptos"
ggsave(here("figures/s6.tiff"))
Saving 7.29 x 4.51 in image
Prepare the datasets for modelling
mdata <- ward_inc %>%
filter(tb_type=="Pulmonary") %>%
mutate(acf_period = case_when(year %in% c(1950:1956) ~ "a. pre-acf",
year %in% c(1957) ~ "b. acf",
year %in% c(1958:1963) ~ "c. post-acf")) %>%
group_by(ward) %>%
mutate(y_num = row_number()) %>%
ungroup()
mdata_extrapulmonary <- ward_inc %>%
filter(tb_type=="Non-Pulmonary") %>%
mutate(acf_period = case_when(year %in% c(1950:1956) ~ "a. pre-acf",
year %in% c(1957) ~ "b. acf",
year %in% c(1958:1963) ~ "c. post-acf")) %>%
group_by(ward) %>%
mutate(y_num = row_number()) %>%
ungroup() %>%
filter(year<=1961) #no data for 1962 and 1963
#scaffold for overall predictions
overall_scaffold <- mdata %>%
select(year, year2, y_num, acf_period, population_without_inst_ship, ward, cases) %>%
group_by(year, year2, y_num, acf_period) %>%
summarise(population_without_inst_ship = sum(population_without_inst_ship),
cases = sum(cases)) %>%
ungroup() %>%
mutate(inc_100k = cases/population_without_inst_ship*100000) %>%
left_join(mdata_extrapulmonary %>% group_by(year) %>%
summarise(cases_extrapulmonary = sum(cases))) %>%
mutate(inc_100k_extrapulmonary = cases_extrapulmonary/population_without_inst_ship*100000)
`summarise()` has grouped output by 'year', 'year2', 'y_num'. You can override using the `.groups` argument.
Joining with `by = join_by(year)`
Look at the mean and variance of counts (counts of pulmonary notifications are what we are predicting)
#Mean of counts per year
mean(mdata$cases)
[1] 48.32819
#variance of counts per year
var(mdata$cases)
[1] 915.5749
Quite a bit of over-dispersion here, so negative binomial distribution might be a better choice of distributional family than Poisson.
Fit the model with the data
m_pulmonary <- brm(
cases ~ 0 + Intercept + y_num*acf_period + (1 + y_num*acf_period | ward) + offset(log(population_without_inst_ship)),
data = mdata,
family = negbinomial(),
seed = 1234,
threads = threading(2, grainsize = 100, static = TRUE), #for exact reproducibility
backend = "cmdstanr",
chains = 4, cores = 4,
prior = prior(normal(0,1), class=b, coef = "Intercept") +
prior(gamma(0.01, 0.01), class = shape) +
prior(normal(0, 1), class = b) +
prior(exponential(1), class=sd) +
prior(lkj(4), class=cor),
control = list(adapt_delta = 0.9))
Model executable is up to date!
Start sampling
Running MCMC with 4 parallel chains, with 2 thread(s) per chain...
Chain 1 Iteration: 1 / 2000 [ 0%] (Warmup)
Chain 2 Iteration: 1 / 2000 [ 0%] (Warmup)
Chain 3 Iteration: 1 / 2000 [ 0%] (Warmup)
Chain 4 Iteration: 1 / 2000 [ 0%] (Warmup)
Chain 3 Iteration: 100 / 2000 [ 5%] (Warmup)
Chain 1 Iteration: 100 / 2000 [ 5%] (Warmup)
Chain 2 Iteration: 100 / 2000 [ 5%] (Warmup)
Chain 4 Iteration: 100 / 2000 [ 5%] (Warmup)
Chain 3 Iteration: 200 / 2000 [ 10%] (Warmup)
Chain 1 Iteration: 200 / 2000 [ 10%] (Warmup)
Chain 4 Iteration: 200 / 2000 [ 10%] (Warmup)
Chain 2 Iteration: 200 / 2000 [ 10%] (Warmup)
Chain 3 Iteration: 300 / 2000 [ 15%] (Warmup)
Chain 1 Iteration: 300 / 2000 [ 15%] (Warmup)
Chain 4 Iteration: 300 / 2000 [ 15%] (Warmup)
Chain 2 Iteration: 300 / 2000 [ 15%] (Warmup)
Chain 3 Iteration: 400 / 2000 [ 20%] (Warmup)
Chain 1 Iteration: 400 / 2000 [ 20%] (Warmup)
Chain 4 Iteration: 400 / 2000 [ 20%] (Warmup)
Chain 2 Iteration: 400 / 2000 [ 20%] (Warmup)
Chain 3 Iteration: 500 / 2000 [ 25%] (Warmup)
Chain 4 Iteration: 500 / 2000 [ 25%] (Warmup)
Chain 1 Iteration: 500 / 2000 [ 25%] (Warmup)
Chain 2 Iteration: 500 / 2000 [ 25%] (Warmup)
Chain 3 Iteration: 600 / 2000 [ 30%] (Warmup)
Chain 1 Iteration: 600 / 2000 [ 30%] (Warmup)
Chain 2 Iteration: 600 / 2000 [ 30%] (Warmup)
Chain 4 Iteration: 600 / 2000 [ 30%] (Warmup)
Chain 3 Iteration: 700 / 2000 [ 35%] (Warmup)
Chain 1 Iteration: 700 / 2000 [ 35%] (Warmup)
Chain 2 Iteration: 700 / 2000 [ 35%] (Warmup)
Chain 4 Iteration: 700 / 2000 [ 35%] (Warmup)
Chain 3 Iteration: 800 / 2000 [ 40%] (Warmup)
Chain 1 Iteration: 800 / 2000 [ 40%] (Warmup)
Chain 2 Iteration: 800 / 2000 [ 40%] (Warmup)
Chain 3 Iteration: 900 / 2000 [ 45%] (Warmup)
Chain 4 Iteration: 800 / 2000 [ 40%] (Warmup)
Chain 1 Iteration: 900 / 2000 [ 45%] (Warmup)
Chain 2 Iteration: 900 / 2000 [ 45%] (Warmup)
Chain 3 Iteration: 1000 / 2000 [ 50%] (Warmup)
Chain 3 Iteration: 1001 / 2000 [ 50%] (Sampling)
Chain 4 Iteration: 900 / 2000 [ 45%] (Warmup)
Chain 2 Iteration: 1000 / 2000 [ 50%] (Warmup)
Chain 2 Iteration: 1001 / 2000 [ 50%] (Sampling)
Chain 1 Iteration: 1000 / 2000 [ 50%] (Warmup)
Chain 1 Iteration: 1001 / 2000 [ 50%] (Sampling)
Chain 3 Iteration: 1100 / 2000 [ 55%] (Sampling)
Chain 4 Iteration: 1000 / 2000 [ 50%] (Warmup)
Chain 4 Iteration: 1001 / 2000 [ 50%] (Sampling)
Chain 2 Iteration: 1100 / 2000 [ 55%] (Sampling)
Chain 3 Iteration: 1200 / 2000 [ 60%] (Sampling)
Chain 4 Iteration: 1100 / 2000 [ 55%] (Sampling)
Chain 2 Iteration: 1200 / 2000 [ 60%] (Sampling)
Chain 1 Iteration: 1100 / 2000 [ 55%] (Sampling)
Chain 3 Iteration: 1300 / 2000 [ 65%] (Sampling)
Chain 4 Iteration: 1200 / 2000 [ 60%] (Sampling)
Chain 2 Iteration: 1300 / 2000 [ 65%] (Sampling)
Chain 3 Iteration: 1400 / 2000 [ 70%] (Sampling)
Chain 2 Iteration: 1400 / 2000 [ 70%] (Sampling)
Chain 4 Iteration: 1300 / 2000 [ 65%] (Sampling)
Chain 1 Iteration: 1200 / 2000 [ 60%] (Sampling)
Chain 3 Iteration: 1500 / 2000 [ 75%] (Sampling)
Chain 2 Iteration: 1500 / 2000 [ 75%] (Sampling)
Chain 4 Iteration: 1400 / 2000 [ 70%] (Sampling)
Chain 3 Iteration: 1600 / 2000 [ 80%] (Sampling)
Chain 2 Iteration: 1600 / 2000 [ 80%] (Sampling)
Chain 1 Iteration: 1300 / 2000 [ 65%] (Sampling)
Chain 4 Iteration: 1500 / 2000 [ 75%] (Sampling)
Chain 3 Iteration: 1700 / 2000 [ 85%] (Sampling)
Chain 2 Iteration: 1700 / 2000 [ 85%] (Sampling)
Chain 4 Iteration: 1600 / 2000 [ 80%] (Sampling)
Chain 3 Iteration: 1800 / 2000 [ 90%] (Sampling)
Chain 2 Iteration: 1800 / 2000 [ 90%] (Sampling)
Chain 1 Iteration: 1400 / 2000 [ 70%] (Sampling)
Chain 4 Iteration: 1700 / 2000 [ 85%] (Sampling)
Chain 3 Iteration: 1900 / 2000 [ 95%] (Sampling)
Chain 2 Iteration: 1900 / 2000 [ 95%] (Sampling)
Chain 4 Iteration: 1800 / 2000 [ 90%] (Sampling)
Chain 3 Iteration: 2000 / 2000 [100%] (Sampling)
Chain 3 finished in 51.5 seconds.
Chain 2 Iteration: 2000 / 2000 [100%] (Sampling)
Chain 1 Iteration: 1500 / 2000 [ 75%] (Sampling)
Chain 2 finished in 52.7 seconds.
Chain 4 Iteration: 1900 / 2000 [ 95%] (Sampling)
Chain 4 Iteration: 2000 / 2000 [100%] (Sampling)
Chain 4 finished in 54.7 seconds.
Chain 1 Iteration: 1600 / 2000 [ 80%] (Sampling)
Chain 1 Iteration: 1700 / 2000 [ 85%] (Sampling)
Chain 1 Iteration: 1800 / 2000 [ 90%] (Sampling)
Chain 1 Iteration: 1900 / 2000 [ 95%] (Sampling)
Chain 1 Iteration: 2000 / 2000 [100%] (Sampling)
Chain 1 finished in 66.4 seconds.
All 4 chains finished successfully.
Mean chain execution time: 56.3 seconds.
Total execution time: 66.5 seconds.
#check model diagnostics
summary(m_pulmonary)
Family: negbinomial
Links: mu = log; shape = identity
Formula: cases ~ 0 + Intercept + y_num * acf_period + (1 + y_num * acf_period | ward) + offset(log(population_without_inst_ship))
Data: mdata (Number of observations: 518)
Draws: 4 chains, each with iter = 2000; warmup = 1000; thin = 1;
total post-warmup draws = 4000
Multilevel Hyperparameters:
~ward (Number of levels: 37)
Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS Tail_ESS
sd(Intercept) 0.25 0.04 0.19 0.33 1.00 1315 1966
sd(y_num) 0.02 0.01 0.01 0.03 1.01 723 477
sd(acf_periodb.acf) 0.06 0.04 0.00 0.16 1.00 1722 2034
sd(acf_periodc.postMacf) 0.12 0.06 0.01 0.25 1.00 569 912
sd(y_num:acf_periodb.acf) 0.01 0.01 0.00 0.02 1.00 1546 1922
sd(y_num:acf_periodc.postMacf) 0.01 0.01 0.00 0.02 1.00 539 1484
cor(Intercept,y_num) -0.45 0.19 -0.75 0.00 1.00 2120 2062
cor(Intercept,acf_periodb.acf) -0.23 0.29 -0.71 0.37 1.00 3239 3178
cor(y_num,acf_periodb.acf) -0.04 0.27 -0.56 0.50 1.00 5062 2991
cor(Intercept,acf_periodc.postMacf) -0.14 0.25 -0.58 0.39 1.00 3544 2296
cor(y_num,acf_periodc.postMacf) 0.10 0.26 -0.42 0.58 1.00 3117 2865
cor(acf_periodb.acf,acf_periodc.postMacf) 0.06 0.27 -0.48 0.57 1.00 2099 3051
cor(Intercept,y_num:acf_periodb.acf) -0.24 0.28 -0.71 0.35 1.00 3201 3417
cor(y_num,y_num:acf_periodb.acf) -0.04 0.27 -0.54 0.48 1.00 4993 3153
cor(acf_periodb.acf,y_num:acf_periodb.acf) -0.06 0.28 -0.58 0.49 1.00 4171 3668
cor(acf_periodc.postMacf,y_num:acf_periodb.acf) 0.07 0.28 -0.48 0.59 1.00 4087 3237
cor(Intercept,y_num:acf_periodc.postMacf) -0.02 0.25 -0.50 0.47 1.00 4234 2785
cor(y_num,y_num:acf_periodc.postMacf) -0.03 0.27 -0.54 0.48 1.00 2650 2851
cor(acf_periodb.acf,y_num:acf_periodc.postMacf) 0.05 0.27 -0.47 0.55 1.00 2124 2721
cor(acf_periodc.postMacf,y_num:acf_periodc.postMacf) -0.08 0.29 -0.62 0.49 1.00 2255 2817
cor(y_num:acf_periodb.acf,y_num:acf_periodc.postMacf) 0.05 0.28 -0.48 0.58 1.00 2090 2548
Regression Coefficients:
Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS Tail_ESS
Intercept -6.14 0.05 -6.24 -6.04 1.00 813 1321
y_num -0.02 0.01 -0.03 -0.01 1.00 2386 2639
acf_periodb.acf 0.02 1.00 -1.98 2.05 1.00 3425 2622
acf_periodc.postMacf 0.04 0.11 -0.17 0.24 1.00 3644 2947
y_num:acf_periodb.acf 0.08 0.13 -0.17 0.33 1.00 3406 2500
y_num:acf_periodc.postMacf -0.05 0.01 -0.07 -0.03 1.00 3147 2860
Further Distributional Parameters:
Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS Tail_ESS
shape 92.28 20.75 61.22 139.67 1.00 3104 2350
Draws were sampled using sample(hmc). For each parameter, Bulk_ESS
and Tail_ESS are effective sample size measures, and Rhat is the potential
scale reduction factor on split chains (at convergence, Rhat = 1).
plot(m_pulmonary)
pp_check(m_pulmonary, type='ecdf_overlay')
Using 10 posterior draws for ppc type 'ecdf_overlay' by default.
prior_summary(m_pulmonary)
prior class coef group resp dpar nlpar lb ub source
normal(0, 1) b user
normal(0, 1) b acf_periodb.acf (vectorized)
normal(0, 1) b acf_periodc.postMacf (vectorized)
normal(0, 1) b Intercept user
normal(0, 1) b y_num (vectorized)
normal(0, 1) b y_num:acf_periodb.acf (vectorized)
normal(0, 1) b y_num:acf_periodc.postMacf (vectorized)
lkj_corr_cholesky(4) L user
lkj_corr_cholesky(4) L ward (vectorized)
exponential(1) sd 0 user
exponential(1) sd ward 0 (vectorized)
exponential(1) sd acf_periodb.acf ward 0 (vectorized)
exponential(1) sd acf_periodc.postMacf ward 0 (vectorized)
exponential(1) sd Intercept ward 0 (vectorized)
exponential(1) sd y_num ward 0 (vectorized)
exponential(1) sd y_num:acf_periodb.acf ward 0 (vectorized)
exponential(1) sd y_num:acf_periodc.postMacf ward 0 (vectorized)
gamma(0.01, 0.01) shape 0 user
Nicer version of trace plots for supplemental material
as_draws_df(m_pulmonary) %>%
bayesplot::mcmc_rank_overlay(pars = vars(b_Intercept:shape),
facet_args = list(ncol = 4)) +
scale_colour_scico_d(palette = "managua", name = "Chain") +
theme_ggdist()+
theme(panel.border = element_rect(colour = "grey78", fill=NA),
legend.position = "top")
Scale for colour is already present.
Adding another scale for colour, which will replace the existing scale.
ggsave(here("figures/s8.tiff"), width=16, height=16)
Nicer version of table of parameters for supplement
summarise_draws(m_pulmonary) %>%
mutate(across(c(mean:ess_tail), comma, accuracy=0.01)) %>%
write_csv(here("figures/s9_table.csv"))
Warning: There was 1 warning in `mutate()`.
ℹ In argument: `across(c(mean:ess_tail), comma, accuracy = 0.01)`.
Caused by warning:
! The `...` argument of `across()` is deprecated as of dplyr 1.1.0.
Supply arguments directly to `.fns` through an anonymous function instead.
# Previously
across(a:b, mean, na.rm = TRUE)
# Now
across(a:b, \(x) mean(x, na.rm = TRUE))
This warning is displayed once every 8 hours.
Call `lifecycle::last_lifecycle_warnings()` to see where this warning was generated.
Summarise the posterior in graphical form
f1b <- plot_counterfactual(model_data = overall_scaffold, model = m_pulmonary,
population_denominator = population_without_inst_ship, outcome = inc_100k, grouping_var=NULL,
re_formula = NA)
f1b
Warning: The S3 guide system was deprecated in ggplot2 3.5.0.
ℹ It has been replaced by a ggproto system that can be extended.
This warning is displayed once every 8 hours.
Call `lifecycle::last_lifecycle_warnings()` to see where this warning was generated.
Warning: Removed 1 row containing missing values or values outside the scale range (`geom_line()`).
Make this into a figure combined with the map of empirical data
f1a <- st_as_sf(left_join(ward_inc, glasgow_wards_1951)) %>%
filter(tb_type=="Pulmonary") %>%
ggplot() +
geom_sf(aes(fill=inc_100k), colour="grey98", lwd=0.01) +
facet_wrap(year~., ncol = 7) +
scale_fill_scico(name="CNR (per 100,000)",
palette = "acton", direction = -1) +
theme_grey() +
theme(legend.position = "top",
#legend.key.width = unit(1, "cm"),
legend.title.align = 0.5,
axis.text.x = element_blank(),
axis.text.y = element_blank(),
axis.line = element_blank(),
axis.ticks = element_blank(),
panel.background = element_blank(),
legend.title = element_text(size=10))
Joining with `by = join_by(division, ward, ward_number)`
Warning: The `legend.title.align` argument of `theme()` is deprecated as of ggplot2 3.5.0.
ℹ Please use theme(legend.title = element_text(hjust)) instead.
This warning is displayed once every 8 hours.
Call `lifecycle::last_lifecycle_warnings()` to see where this warning was generated.
(f1a / f1b) + plot_annotation(tag_levels = "A")
Warning: Removed 1 row containing missing values or values outside the scale range (`geom_line()`).
ggsave(here("figures/f1.tiff"), width=7, height=8)
Warning: Removed 1 row containing missing values or values outside the scale range (`geom_line()`).
Summary of change in notifications numerically
overall_change <- summarise_change(model_data=overall_scaffold, model=m_pulmonary,
population_denominator=population_without_inst_ship, grouping_var=NULL, re_formula = NA)
`summarise()` has grouped output by '.draw'. You can override using the `.groups` argument.
#want to keep the summary estimates here
tokeep <- c("peak_summary", "level_summary", "slope_summary")
#summary measures in a table
overall_change %>%
keep((names(.) %in% tokeep)) %>%
bind_rows() %>%
mutate(across(c(estimate:.upper), number, accuracy=0.01)) %>%
select(measure, everything()) %>%
datatable()
NA
NA
Numbers of pulmonary TB cases averted compared to counterfactual per year.
Total pulmonary TB cases averted between 1958 and 1963
What are the correlations between peak, level, and slope?
#RR.peak histogram
a <- overall_change$peak_draws %>%
ggplot() +
geom_histogram(aes(x=estimate), fill="darkblue", colour="darkblue", alpha=0.3)+
scale_fill_gradient(high="lightblue1",low="darkblue") +
theme_ggdist() +
theme(legend.position = "none",
panel.border = element_rect(colour = "grey78", fill=NA)) +
labs(x="RR.peak",
y="")
#RR. level histogram
b <- overall_change$level_draws %>%
ggplot() +
geom_histogram(aes(x=estimate), fill="darkblue", colour="darkblue", alpha=0.3)+
scale_fill_gradient(high="lightblue1",low="darkblue") +
theme_ggdist() +
theme(legend.position = "none",
panel.border = element_rect(colour = "grey78", fill=NA)) +
labs(x="RR.level",
y="")
#RR.slope histogram
c <- overall_change$slope_draws %>%
ggplot() +
geom_histogram(aes(x=estimate), fill="darkblue", colour="darkblue", alpha=0.3)+
scale_fill_gradient(high="lightblue1",low="darkblue") +
#scale_x_continuous(limits = c(0, 6)) +
theme_ggdist() +
theme(legend.position = "none",
panel.border = element_rect(colour = "grey78", fill=NA)) +
labs(x="RR.slope",
y="")
#Correlation between RR.peak and RR.level
cor_rr_peak_rr_level <- round(cor(pluck(overall_change$peak_draws$estimate), pluck(overall_change$level_draws$estimate)), digits = 2)
#Correlation between RR.peak and RR.slope
cor_rr_peak_rr_slope <- round(cor(pluck(overall_change$peak_draws$estimate), pluck(overall_change$slope_draws$estimate)), digits = 2)
#Correlation between RR.level and RR.slope
cor_rr_level_rr_slope <- round(cor(pluck(overall_change$level_draws$estimate), pluck(overall_change$slope_draws$estimate)), digits = 2)
#plot of correlation between RR.peak and RR.level
d <- bind_cols(RR.peak=pluck(overall_change$peak_draws$estimate),
RR.level =pluck(overall_change$level_draws$estimate)) %>%
ggplot(aes(y=RR.peak, x = RR.level)) +
geom_hex() +
geom_smooth(se=FALSE, colour="firebrick", method = "lm") +
geom_text(aes(y=2.2, x=0.58, label=cor_rr_peak_rr_level), colour="firebrick") +
scale_fill_gradient(high="lightblue1",low="darkblue") +
theme_ggdist() +
theme(legend.position = "none",
panel.border = element_rect(colour = "grey78", fill=NA))
#plot of correlation between RR.peak and RR.slope
e <- bind_cols(RR.peak=pluck(overall_change$peak_draws$estimate),
RR.slope =pluck(overall_change$slope_draws$estimate)) %>%
ggplot(aes(y=RR.peak, x = RR.slope)) +
geom_hex() +
geom_smooth(se=FALSE, colour="firebrick", method = "lm") +
geom_text(aes(y=2.1, x=0.65, label=cor_rr_peak_rr_slope), colour="firebrick") +
#scale_x_continuous(limits = c(0, 6)) +
scale_fill_gradient(high="lightblue1",low="darkblue") +
theme_ggdist() +
theme(legend.position = "none",
panel.border = element_rect(colour = "grey78", fill=NA))
#plot of correlation between RR.level and RR.slope
f <- bind_cols(RR.level=pluck(overall_change$level_draws$estimate),
RR.slope =pluck(overall_change$slope_draws$estimate)) %>%
ggplot(aes(y=RR.level, x = RR.slope)) +
geom_hex() +
geom_smooth(se=FALSE, colour="firebrick", method = "lm") +
geom_text(aes(y=0.75, x=0.65, label=cor_rr_level_rr_slope), colour="firebrick") +
#scale_x_continuous(limits = c(0, 6)) +
scale_fill_gradient(high="lightblue1",low="darkblue") +
theme_ggdist() +
theme(legend.position = "none",
panel.border = element_rect(colour = "grey78", fill=NA))
(plot_spacer() + plot_spacer() + c) /
(plot_spacer() + b + f) /
(a + d + e)
`stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
`stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
Warning in geom_text(aes(y = 0.75, x = 0.65, label = cor_rr_level_rr_slope), :
All aesthetics have length 1, but the data has 4000 rows.
ℹ Please consider using `annotate()` or provide this layer with data containing a single row.
`geom_smooth()` using formula = 'y ~ x'
`stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
Warning in geom_text(aes(y = 2.2, x = 0.58, label = cor_rr_peak_rr_level), :
All aesthetics have length 1, but the data has 4000 rows.
ℹ Please consider using `annotate()` or provide this layer with data containing a single row.
`geom_smooth()` using formula = 'y ~ x'
Warning in geom_text(aes(y = 2.1, x = 0.65, label = cor_rr_peak_rr_slope), :
All aesthetics have length 1, but the data has 4000 rows.
ℹ Please consider using `annotate()` or provide this layer with data containing a single row.
`geom_smooth()` using formula = 'y ~ x'
ggsave(here("figures/s10.tiff"), width=8, height=8)
`stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
`stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
Warning in geom_text(aes(y = 0.75, x = 0.65, label = cor_rr_level_rr_slope), :
All aesthetics have length 1, but the data has 4000 rows.
ℹ Please consider using `annotate()` or provide this layer with data containing a single row.
`geom_smooth()` using formula = 'y ~ x'
`stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
Warning in geom_text(aes(y = 2.2, x = 0.58, label = cor_rr_peak_rr_level), :
All aesthetics have length 1, but the data has 4000 rows.
ℹ Please consider using `annotate()` or provide this layer with data containing a single row.
`geom_smooth()` using formula = 'y ~ x'
Warning in geom_text(aes(y = 2.1, x = 0.65, label = cor_rr_peak_rr_slope), :
All aesthetics have length 1, but the data has 4000 rows.
ℹ Please consider using `annotate()` or provide this layer with data containing a single row.
`geom_smooth()` using formula = 'y ~ x'
Plot the counterfactual at ward level
plot_counterfactual(model_data = mdata, model=m_pulmonary, outcome = inc_100k, population_denominator = population_without_inst_ship,
grouping_var = ward, ward, re_formula= ~(1 + y_num*acf_period | ward)) +
scale_y_continuous(limits = c(0,500))
Scale for y is already present.
Adding another scale for y, which will replace the existing scale.
Warning in max(ids, na.rm = TRUE) :
no non-missing arguments to max; returning -Inf
Warning in max(ids, na.rm = TRUE) :
no non-missing arguments to max; returning -Inf
Warning in max(ids, na.rm = TRUE) :
no non-missing arguments to max; returning -Inf
Warning in max(ids, na.rm = TRUE) :
no non-missing arguments to max; returning -Inf
Warning: Removed 37 rows containing missing values or values outside the scale range (`geom_line()`).
ggsave(here("figures/s7.tiff"), width=16, height=12)
Warning in max(ids, na.rm = TRUE) :
no non-missing arguments to max; returning -Inf
Warning in max(ids, na.rm = TRUE) :
no non-missing arguments to max; returning -Inf
Warning in max(ids, na.rm = TRUE) :
no non-missing arguments to max; returning -Inf
Warning in max(ids, na.rm = TRUE) :
no non-missing arguments to max; returning -Inf
Warning: Removed 37 rows containing missing values or values outside the scale range (`geom_line()`).
Summary of change in notifications at ward level
Calculate the counterfactual per ward
ward_pulmonary_counterf <- calculate_counterfactual(model_data = mdata, model=m_pulmonary,
population_denominator = population_without_inst_ship,
grouping_var = ward, re_formula=~(1 + y_num*acf_period | ward))
`summarise()` has grouped output by '.draw'. You can override using the `.groups` argument.
`summarise()` has grouped output by '.draw'. You can override using the `.groups` argument.
Joining with `by = join_by(year, population_without_inst_ship, .draw, ward)`
Joining with `by = join_by(.draw, ward)`
ward_pulmonary_counterf$counter_post %>%
mutate(across(c(cases_averted:cases_averted.upper, diff_inc100k:diff_inc100k.upper), number_format(accuracy = 0.1, big.mark = ","))) %>%
mutate(across(c(rr_inc100k:rr_inc100k.upper), number_format(accuracy = 0.01))) %>%
mutate(across(c(pct_change:pct_change.upper), percent, accuracy=0.1)) %>%
datatable()
NA
NA
Overall counterfactual per ward
ward_pulmonary_counterf$counter_post_overall %>%
mutate(across(c(cases_averted:cases_averted.upper), number_format(accuracy = 0.1, big.mark = ","))) %>%
mutate(across(c(pct_change:pct_change.upper), percent, accuracy=0.1)) %>%
datatable()
NA
Now we will model the extra-pulmonary TB notification rate. Struggling a bit with negative binomial model, so revert to Poisson.
m_extrapulmonary <- brm(
cases ~ 1 + y_num*acf_period + (1 + y_num*acf_period | ward) + offset(log(population_without_inst_ship)),
data = mdata_extrapulmonary,
family = negbinomial(),
seed = 1234,
threads = threading(2, grainsize = 100, static = TRUE), #for exact reproducibility
backend = "cmdstanr",
chains = 4, cores = 4,
prior = prior(normal(0,1000), class = Intercept) +
prior(gamma(0.01, 0.01), class = shape) +
prior(normal(0, 1), class = b) +
prior(exponential(1), class=sd) +
prior(lkj(2), class=cor))
Compiling Stan program...
-
\
|
/
-
\
|
/
-
\
|
/
-
\
|
/
-
\
|
/
-
\
|
/
-
\
|
/
-
\
|
/
-
\
|
/
-
\
|
/
-
\
|
/
-
\
|
/
-
\
Start sampling
Running MCMC with 4 parallel chains, with 2 thread(s) per chain...
Chain 1 Iteration: 1 / 2000 [ 0%] (Warmup)
Chain 2 Iteration: 1 / 2000 [ 0%] (Warmup)
Chain 3 Iteration: 1 / 2000 [ 0%] (Warmup)
Chain 4 Iteration: 1 / 2000 [ 0%] (Warmup)
Chain 1 Iteration: 100 / 2000 [ 5%] (Warmup)
Chain 3 Iteration: 100 / 2000 [ 5%] (Warmup)
Chain 2 Iteration: 100 / 2000 [ 5%] (Warmup)
Chain 4 Iteration: 100 / 2000 [ 5%] (Warmup)
Chain 1 Iteration: 200 / 2000 [ 10%] (Warmup)
Chain 3 Iteration: 200 / 2000 [ 10%] (Warmup)
Chain 2 Iteration: 200 / 2000 [ 10%] (Warmup)
Chain 1 Iteration: 300 / 2000 [ 15%] (Warmup)
Chain 3 Iteration: 300 / 2000 [ 15%] (Warmup)
Chain 4 Iteration: 200 / 2000 [ 10%] (Warmup)
Chain 1 Iteration: 400 / 2000 [ 20%] (Warmup)
Chain 2 Iteration: 300 / 2000 [ 15%] (Warmup)
Chain 3 Iteration: 400 / 2000 [ 20%] (Warmup)
Chain 4 Iteration: 300 / 2000 [ 15%] (Warmup)
Chain 2 Iteration: 400 / 2000 [ 20%] (Warmup)
Chain 1 Iteration: 500 / 2000 [ 25%] (Warmup)
Chain 3 Iteration: 500 / 2000 [ 25%] (Warmup)
Chain 4 Iteration: 400 / 2000 [ 20%] (Warmup)
Chain 2 Iteration: 500 / 2000 [ 25%] (Warmup)
Chain 3 Iteration: 600 / 2000 [ 30%] (Warmup)
Chain 1 Iteration: 600 / 2000 [ 30%] (Warmup)
Chain 4 Iteration: 500 / 2000 [ 25%] (Warmup)
Chain 3 Iteration: 700 / 2000 [ 35%] (Warmup)
Chain 1 Iteration: 700 / 2000 [ 35%] (Warmup)
Chain 2 Iteration: 600 / 2000 [ 30%] (Warmup)
Chain 4 Iteration: 600 / 2000 [ 30%] (Warmup)
Chain 3 Iteration: 800 / 2000 [ 40%] (Warmup)
Chain 2 Iteration: 700 / 2000 [ 35%] (Warmup)
Chain 4 Iteration: 700 / 2000 [ 35%] (Warmup)
Chain 1 Iteration: 800 / 2000 [ 40%] (Warmup)
Chain 3 Iteration: 900 / 2000 [ 45%] (Warmup)
Chain 2 Iteration: 800 / 2000 [ 40%] (Warmup)
Chain 4 Iteration: 800 / 2000 [ 40%] (Warmup)
Chain 1 Iteration: 900 / 2000 [ 45%] (Warmup)
Chain 2 Iteration: 900 / 2000 [ 45%] (Warmup)
Chain 3 Iteration: 1000 / 2000 [ 50%] (Warmup)
Chain 3 Iteration: 1001 / 2000 [ 50%] (Sampling)
Chain 4 Iteration: 900 / 2000 [ 45%] (Warmup)
Chain 1 Iteration: 1000 / 2000 [ 50%] (Warmup)
Chain 1 Iteration: 1001 / 2000 [ 50%] (Sampling)
Chain 3 Iteration: 1100 / 2000 [ 55%] (Sampling)
Chain 2 Iteration: 1000 / 2000 [ 50%] (Warmup)
Chain 2 Iteration: 1001 / 2000 [ 50%] (Sampling)
Chain 4 Iteration: 1000 / 2000 [ 50%] (Warmup)
Chain 4 Iteration: 1001 / 2000 [ 50%] (Sampling)
Chain 3 Iteration: 1200 / 2000 [ 60%] (Sampling)
Chain 1 Iteration: 1100 / 2000 [ 55%] (Sampling)
Chain 2 Iteration: 1100 / 2000 [ 55%] (Sampling)
Chain 4 Iteration: 1100 / 2000 [ 55%] (Sampling)
Chain 2 Iteration: 1200 / 2000 [ 60%] (Sampling)
Chain 3 Iteration: 1300 / 2000 [ 65%] (Sampling)
Chain 1 Iteration: 1200 / 2000 [ 60%] (Sampling)
Chain 2 Iteration: 1300 / 2000 [ 65%] (Sampling)
Chain 3 Iteration: 1400 / 2000 [ 70%] (Sampling)
Chain 4 Iteration: 1200 / 2000 [ 60%] (Sampling)
Chain 3 Iteration: 1500 / 2000 [ 75%] (Sampling)
Chain 2 Iteration: 1400 / 2000 [ 70%] (Sampling)
Chain 1 Iteration: 1300 / 2000 [ 65%] (Sampling)
Chain 4 Iteration: 1300 / 2000 [ 65%] (Sampling)
Chain 3 Iteration: 1600 / 2000 [ 80%] (Sampling)
Chain 2 Iteration: 1500 / 2000 [ 75%] (Sampling)
Chain 1 Iteration: 1400 / 2000 [ 70%] (Sampling)
Chain 3 Iteration: 1700 / 2000 [ 85%] (Sampling)
Chain 2 Iteration: 1600 / 2000 [ 80%] (Sampling)
Chain 4 Iteration: 1400 / 2000 [ 70%] (Sampling)
Chain 3 Iteration: 1800 / 2000 [ 90%] (Sampling)
Chain 2 Iteration: 1700 / 2000 [ 85%] (Sampling)
Chain 1 Iteration: 1500 / 2000 [ 75%] (Sampling)
Chain 4 Iteration: 1500 / 2000 [ 75%] (Sampling)
Chain 3 Iteration: 1900 / 2000 [ 95%] (Sampling)
Chain 2 Iteration: 1800 / 2000 [ 90%] (Sampling)
Chain 1 Iteration: 1600 / 2000 [ 80%] (Sampling)
Chain 3 Iteration: 2000 / 2000 [100%] (Sampling)
Chain 3 finished in 15.1 seconds.
Chain 2 Iteration: 1900 / 2000 [ 95%] (Sampling)
Chain 4 Iteration: 1600 / 2000 [ 80%] (Sampling)
Chain 2 Iteration: 2000 / 2000 [100%] (Sampling)
Chain 2 finished in 15.7 seconds.
Chain 1 Iteration: 1700 / 2000 [ 85%] (Sampling)
Chain 4 Iteration: 1700 / 2000 [ 85%] (Sampling)
Chain 1 Iteration: 1800 / 2000 [ 90%] (Sampling)
Chain 4 Iteration: 1800 / 2000 [ 90%] (Sampling)
Chain 1 Iteration: 1900 / 2000 [ 95%] (Sampling)
Chain 4 Iteration: 1900 / 2000 [ 95%] (Sampling)
Chain 1 Iteration: 2000 / 2000 [100%] (Sampling)
Chain 1 finished in 17.2 seconds.
Chain 4 Iteration: 2000 / 2000 [100%] (Sampling)
Chain 4 finished in 17.4 seconds.
All 4 chains finished successfully.
Mean chain execution time: 16.4 seconds.
Total execution time: 17.7 seconds.
summary(m_extrapulmonary)
Family: negbinomial
Links: mu = log; shape = identity
Formula: cases ~ 1 + y_num * acf_period + (1 + y_num * acf_period | ward) + offset(log(population_without_inst_ship))
Data: mdata_extrapulmonary (Number of observations: 444)
Draws: 4 chains, each with iter = 2000; warmup = 1000; thin = 1;
total post-warmup draws = 4000
Multilevel Hyperparameters:
~ward (Number of levels: 37)
Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS Tail_ESS
sd(Intercept) 0.33 0.06 0.22 0.46 1.00 1600 2339
sd(y_num) 0.02 0.01 0.00 0.05 1.01 467 1036
sd(acf_periodb.acf) 0.11 0.09 0.00 0.32 1.00 2508 2149
sd(acf_periodc.postMacf) 0.12 0.09 0.00 0.33 1.00 1316 1996
sd(y_num:acf_periodb.acf) 0.01 0.01 0.00 0.04 1.00 2043 2059
sd(y_num:acf_periodc.postMacf) 0.01 0.01 0.00 0.04 1.00 860 1438
cor(Intercept,y_num) -0.11 0.31 -0.65 0.54 1.00 2609 2042
cor(Intercept,acf_periodb.acf) -0.01 0.34 -0.63 0.62 1.00 4662 3089
cor(y_num,acf_periodb.acf) -0.01 0.33 -0.63 0.63 1.00 3904 3033
cor(Intercept,acf_periodc.postMacf) -0.07 0.32 -0.67 0.56 1.00 4270 3035
cor(y_num,acf_periodc.postMacf) -0.04 0.33 -0.66 0.60 1.00 3567 3172
cor(acf_periodb.acf,acf_periodc.postMacf) 0.01 0.33 -0.63 0.63 1.00 3026 2760
cor(Intercept,y_num:acf_periodb.acf) -0.02 0.34 -0.64 0.63 1.00 4719 2926
cor(y_num,y_num:acf_periodb.acf) -0.01 0.33 -0.62 0.61 1.00 4299 3235
cor(acf_periodb.acf,y_num:acf_periodb.acf) -0.07 0.34 -0.69 0.59 1.00 3414 3130
cor(acf_periodc.postMacf,y_num:acf_periodb.acf) 0.01 0.33 -0.61 0.65 1.00 3516 3208
cor(Intercept,y_num:acf_periodc.postMacf) -0.15 0.31 -0.70 0.49 1.00 3725 3085
cor(y_num,y_num:acf_periodc.postMacf) -0.06 0.33 -0.66 0.61 1.00 2789 2903
cor(acf_periodb.acf,y_num:acf_periodc.postMacf) 0.01 0.33 -0.61 0.64 1.00 2638 2658
cor(acf_periodc.postMacf,y_num:acf_periodc.postMacf) -0.08 0.35 -0.70 0.61 1.00 2337 3033
cor(y_num:acf_periodb.acf,y_num:acf_periodc.postMacf) 0.01 0.33 -0.62 0.64 1.00 2602 3009
Regression Coefficients:
Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS Tail_ESS
Intercept -7.92 0.07 -8.07 -7.78 1.00 1581 2598
y_num -0.09 0.01 -0.11 -0.07 1.00 4509 3307
acf_periodb.acf -0.00 0.98 -1.90 1.94 1.00 2635 2429
acf_periodc.postMacf -0.36 0.39 -1.10 0.38 1.00 2799 2850
y_num:acf_periodb.acf -0.01 0.12 -0.26 0.23 1.00 2625 2569
y_num:acf_periodc.postMacf 0.02 0.04 -0.05 0.09 1.00 2712 2675
Further Distributional Parameters:
Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS Tail_ESS
shape 93.46 65.58 27.38 269.02 1.00 3751 3259
Draws were sampled using sample(hmc). For each parameter, Bulk_ESS
and Tail_ESS are effective sample size measures, and Rhat is the potential
scale reduction factor on split chains (at convergence, Rhat = 1).
plot(m_extrapulmonary)
pp_check(m_extrapulmonary, type='ecdf_overlay')
Using 10 posterior draws for ppc type 'ecdf_overlay' by default.
Summarise in plot
plot_counterfactual(model_data = overall_scaffold %>% filter(year<=1961), model=m_extrapulmonary,
population_denominator = population_without_inst_ship, outcome=inc_100k_extrapulmonary, re_formula = NA) +
scale_y_continuous(limits = c(0,50))
Scale for y is already present.
Adding another scale for y, which will replace the existing scale.
Warning: Removed 1 row containing missing values or values outside the scale range (`geom_line()`).
ggsave(here("figures/s11.tiff"), width=10)
Saving 10 x 4.51 in image
Warning: Removed 1 row containing missing values or values outside the scale range (`geom_line()`).
Summarise numerically.
Numbers of extra-pulmonary TB cases averted overall.
overall_ep_counterf <- calculate_counterfactual(model_data = mdata_extrapulmonary, model=m_extrapulmonary,
population_denominator = population_without_inst_ship)
Joining with `by = join_by(year, population_without_inst_ship, .draw)`
Joining with `by = join_by(.draw)`
overall_ep_counterf$counter_post %>%
mutate(across(c(cases_averted:cases_averted.upper, diff_inc100k:diff_inc100k.upper), number_format(accuracy = 0.1, big.mark = ","))) %>%
mutate(across(c(rr_inc100k:rr_inc100k.upper), number_format(accuracy = 0.01))) %>%
mutate(across(c(pct_change:pct_change.upper), percent, accuracy=0.1)) %>%
datatable()
NA
Total extrapulmonary TB cases averted between 1958 and 1963
overall_ep_counterf$counter_post_overall %>%
mutate(across(c(cases_averted:cases_averted.upper), number_format(accuracy = 0.1, big.mark = ","))) %>%
mutate(across(c(pct_change:pct_change.upper), percent, accuracy=0.1)) %>%
datatable()
NA
NA
Make into Table 2
bind_rows(
overall_pulmonary_counterf$counter_post %>%
mutate(across(c(cases_averted:cases_averted.upper, diff_inc100k:diff_inc100k.upper), number_format(accuracy = 0.1, big.mark = ","))) %>%
mutate(across(c(rr_inc100k:rr_inc100k.upper), number_format(accuracy = 0.01))) %>%
mutate(across(c(pct_change:pct_change.upper), percent, accuracy=0.1)) %>%
mutate(model = "PTB_ward"),
overall_pulmonary_counterf$counter_post_overall %>%
mutate(across(c(cases_averted:cases_averted.upper), number_format(accuracy = 0.1, big.mark = ","))) %>%
mutate(across(c(pct_change:pct_change.upper), percent, accuracy=0.1)) %>%
mutate(model = "PTB_overall"),
overall_ep_counterf$counter_post %>%
mutate(across(c(cases_averted:cases_averted.upper, diff_inc100k:diff_inc100k.upper), number_format(accuracy = 0.1, big.mark = ","))) %>%
mutate(across(c(rr_inc100k:rr_inc100k.upper), number_format(accuracy = 0.01))) %>%
mutate(across(c(pct_change:pct_change.upper), percent, accuracy=0.1)) %>%
mutate(model = "EPTB"),
overall_ep_counterf$counter_post_overall %>%
mutate(across(c(cases_averted:cases_averted.upper), number_format(accuracy = 0.1, big.mark = ","))) %>%
mutate(across(c(pct_change:pct_change.upper), percent, accuracy=0.1)) %>%
mutate(model = "EPTB overall")
) %>%
select(model, year, diff_inc100k, diff_inc100k.lower:rr_inc100k.upper,
cases_averted:cases_averted.upper,
pct_change:pct_change.upper) %>%
transmute(model=model, year=year,
diff_cnr = glue("{diff_inc100k} ({diff_inc100k.lower} to {diff_inc100k.upper})"),
rr = glue("{rr_inc100k} ({rr_inc100k.lower} to {rr_inc100k.upper})"),
cases_averted = glue("{cases_averted} ({cases_averted.lower} to {cases_averted.upper})"),
pct_change = glue("{pct_change} ({pct_change.lower} to {pct_change.upper})")) %>%
write_csv(here("figures/table2.csv"))
Ward-level extra-pulmonary estimates in graphical form.
plot_counterfactual(model_data = mdata_extrapulmonary, model=m_extrapulmonary, outcome = inc_100k,
population_denominator = population_without_inst_ship, grouping_var = ward,re_formula =~(y_num*acf_period | ward),
ward) + scale_y_continuous(limits= c(0,75))
Scale for y is already present.
Adding another scale for y, which will replace the existing scale.
Warning: Removed 37 rows containing missing values or values outside the scale range (`geom_line()`).
ggsave(here("figures/s12.tiff"), width=10, height=12)
Warning: Removed 37 rows containing missing values or values outside the scale range (`geom_line()`).
Numerical summary.
ward_change_extrapulmonary <- summarise_change(model_data = mdata_extrapulmonary, model = m_extrapulmonary,
population_denominator = population_without_inst_ship, grouping_var=ward,
re_formula = ~(y_num*acf_period | ward))
`summarise()` has grouped output by '.draw'. You can override using the `.groups` argument.
`summarise()` has grouped output by '.draw'. You can override using the `.groups` argument.
`summarise()` has grouped output by '.draw', 'y_num'. You can override using the `.groups` argument.
`summarise()` has grouped output by '.draw'. You can override using the `.groups` argument.
#want to keep the summary estimates here
tokeep <- c("peak_summary", "level_summary", "slope_summary")
#summary measures in a table
ward_change_extrapulmonary %>%
keep(names(.) %in% tokeep) %>%
bind_rows() %>%
mutate(across(c(estimate:.upper), number, accuracy=0.01)) %>%
select(measure, everything()) %>%
datatable()
NA
NA
NA
Fit the model
(Not rewritten the functions for this yet)
mdata_age_sex <- cases_by_age_sex %>%
filter(tb_type=="Pulmonary") %>%
mutate(acf_period = case_when(year %in% c(1950:1956) ~ "a. pre-acf",
year %in% c(1957) ~ "b. acf",
year %in% c(1958:1963) ~ "c. post-acf")) %>%
mutate(year2 = year+0.5) %>%
group_by(age, sex) %>%
mutate(y_num = row_number()) %>%
ungroup()
m_age_sex <- brm(
cases ~ y_num + (acf_period)*(age*sex) + (acf_period:y_num)*(age*sex),
data = mdata_age_sex,
family = negbinomial(),
seed = 1234,
threads = threading(2, grainsize = 100, static = TRUE), #for exact reproducibility
backend = "cmdstanr",
chains = 4, cores = 4,
prior = prior(normal(0,1), class = Intercept) +
prior(gamma(0.01, 0.01), class = shape) +
prior(normal(0, 1), class = b))
Compiling Stan program...
-
\
|
/
-
\
|
/
-
\
|
/
-
\
|
/
-
\
|
/
-
\
|
/
-
\
|
/
-
\
Start sampling
Running MCMC with 4 parallel chains, with 2 thread(s) per chain...
Chain 1 Iteration: 1 / 2000 [ 0%] (Warmup)
Chain 2 Iteration: 1 / 2000 [ 0%] (Warmup)
Chain 3 Iteration: 1 / 2000 [ 0%] (Warmup)
Chain 4 Iteration: 1 / 2000 [ 0%] (Warmup)
Chain 1 Iteration: 100 / 2000 [ 5%] (Warmup)
Chain 4 Iteration: 100 / 2000 [ 5%] (Warmup)
Chain 3 Iteration: 100 / 2000 [ 5%] (Warmup)
Chain 2 Iteration: 100 / 2000 [ 5%] (Warmup)
Chain 1 Iteration: 200 / 2000 [ 10%] (Warmup)
Chain 4 Iteration: 200 / 2000 [ 10%] (Warmup)
Chain 3 Iteration: 200 / 2000 [ 10%] (Warmup)
Chain 2 Iteration: 200 / 2000 [ 10%] (Warmup)
Chain 1 Iteration: 300 / 2000 [ 15%] (Warmup)
Chain 4 Iteration: 300 / 2000 [ 15%] (Warmup)
Chain 3 Iteration: 300 / 2000 [ 15%] (Warmup)
Chain 2 Iteration: 300 / 2000 [ 15%] (Warmup)
Chain 1 Iteration: 400 / 2000 [ 20%] (Warmup)
Chain 4 Iteration: 400 / 2000 [ 20%] (Warmup)
Chain 3 Iteration: 400 / 2000 [ 20%] (Warmup)
Chain 2 Iteration: 400 / 2000 [ 20%] (Warmup)
Chain 1 Iteration: 500 / 2000 [ 25%] (Warmup)
Chain 4 Iteration: 500 / 2000 [ 25%] (Warmup)
Chain 3 Iteration: 500 / 2000 [ 25%] (Warmup)
Chain 2 Iteration: 500 / 2000 [ 25%] (Warmup)
Chain 1 Iteration: 600 / 2000 [ 30%] (Warmup)
Chain 4 Iteration: 600 / 2000 [ 30%] (Warmup)
Chain 3 Iteration: 600 / 2000 [ 30%] (Warmup)
Chain 2 Iteration: 600 / 2000 [ 30%] (Warmup)
Chain 1 Iteration: 700 / 2000 [ 35%] (Warmup)
Chain 4 Iteration: 700 / 2000 [ 35%] (Warmup)
Chain 3 Iteration: 700 / 2000 [ 35%] (Warmup)
Chain 2 Iteration: 700 / 2000 [ 35%] (Warmup)
Chain 1 Iteration: 800 / 2000 [ 40%] (Warmup)
Chain 4 Iteration: 800 / 2000 [ 40%] (Warmup)
Chain 3 Iteration: 800 / 2000 [ 40%] (Warmup)
Chain 1 Iteration: 900 / 2000 [ 45%] (Warmup)
Chain 2 Iteration: 800 / 2000 [ 40%] (Warmup)
Chain 3 Iteration: 900 / 2000 [ 45%] (Warmup)
Chain 4 Iteration: 900 / 2000 [ 45%] (Warmup)
Chain 2 Iteration: 900 / 2000 [ 45%] (Warmup)
Chain 1 Iteration: 1000 / 2000 [ 50%] (Warmup)
Chain 1 Iteration: 1001 / 2000 [ 50%] (Sampling)
Chain 3 Iteration: 1000 / 2000 [ 50%] (Warmup)
Chain 3 Iteration: 1001 / 2000 [ 50%] (Sampling)
Chain 4 Iteration: 1000 / 2000 [ 50%] (Warmup)
Chain 4 Iteration: 1001 / 2000 [ 50%] (Sampling)
Chain 2 Iteration: 1000 / 2000 [ 50%] (Warmup)
Chain 2 Iteration: 1001 / 2000 [ 50%] (Sampling)
Chain 1 Iteration: 1100 / 2000 [ 55%] (Sampling)
Chain 3 Iteration: 1100 / 2000 [ 55%] (Sampling)
Chain 4 Iteration: 1100 / 2000 [ 55%] (Sampling)
Chain 2 Iteration: 1100 / 2000 [ 55%] (Sampling)
Chain 1 Iteration: 1200 / 2000 [ 60%] (Sampling)
Chain 3 Iteration: 1200 / 2000 [ 60%] (Sampling)
Chain 4 Iteration: 1200 / 2000 [ 60%] (Sampling)
Chain 1 Iteration: 1300 / 2000 [ 65%] (Sampling)
Chain 2 Iteration: 1200 / 2000 [ 60%] (Sampling)
Chain 3 Iteration: 1300 / 2000 [ 65%] (Sampling)
Chain 4 Iteration: 1300 / 2000 [ 65%] (Sampling)
Chain 1 Iteration: 1400 / 2000 [ 70%] (Sampling)
Chain 2 Iteration: 1300 / 2000 [ 65%] (Sampling)
Chain 3 Iteration: 1400 / 2000 [ 70%] (Sampling)
Chain 4 Iteration: 1400 / 2000 [ 70%] (Sampling)
Chain 1 Iteration: 1500 / 2000 [ 75%] (Sampling)
Chain 2 Iteration: 1400 / 2000 [ 70%] (Sampling)
Chain 3 Iteration: 1500 / 2000 [ 75%] (Sampling)
Chain 4 Iteration: 1500 / 2000 [ 75%] (Sampling)
Chain 1 Iteration: 1600 / 2000 [ 80%] (Sampling)
Chain 3 Iteration: 1600 / 2000 [ 80%] (Sampling)
Chain 2 Iteration: 1500 / 2000 [ 75%] (Sampling)
Chain 4 Iteration: 1600 / 2000 [ 80%] (Sampling)
Chain 1 Iteration: 1700 / 2000 [ 85%] (Sampling)
Chain 3 Iteration: 1700 / 2000 [ 85%] (Sampling)
Chain 2 Iteration: 1600 / 2000 [ 80%] (Sampling)
Chain 4 Iteration: 1700 / 2000 [ 85%] (Sampling)
Chain 1 Iteration: 1800 / 2000 [ 90%] (Sampling)
Chain 3 Iteration: 1800 / 2000 [ 90%] (Sampling)
Chain 2 Iteration: 1700 / 2000 [ 85%] (Sampling)
Chain 4 Iteration: 1800 / 2000 [ 90%] (Sampling)
Chain 1 Iteration: 1900 / 2000 [ 95%] (Sampling)
Chain 3 Iteration: 1900 / 2000 [ 95%] (Sampling)
Chain 2 Iteration: 1800 / 2000 [ 90%] (Sampling)
Chain 4 Iteration: 1900 / 2000 [ 95%] (Sampling)
Chain 1 Iteration: 2000 / 2000 [100%] (Sampling)
Chain 1 finished in 20.7 seconds.
Chain 3 Iteration: 2000 / 2000 [100%] (Sampling)
Chain 3 finished in 21.3 seconds.
Chain 4 Iteration: 2000 / 2000 [100%] (Sampling)
Chain 4 finished in 21.4 seconds.
Chain 2 Iteration: 1900 / 2000 [ 95%] (Sampling)
Chain 2 Iteration: 2000 / 2000 [100%] (Sampling)
Chain 2 finished in 22.1 seconds.
All 4 chains finished successfully.
Mean chain execution time: 21.4 seconds.
Total execution time: 22.3 seconds.
summary(m_age_sex)
Family: negbinomial
Links: mu = log; shape = identity
Formula: cases ~ y_num + (acf_period) * (age * sex) + (acf_period:y_num) * (age * sex)
Data: mdata_age_sex (Number of observations: 224)
Draws: 4 chains, each with iter = 2000; warmup = 1000; thin = 1;
total post-warmup draws = 4000
Regression Coefficients:
Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS Tail_ESS
Intercept 4.42 0.12 4.18 4.65 1.00 1197 1845
y_num -0.17 0.03 -0.22 -0.11 1.00 1115 1980
acf_periodb.acf -0.02 1.00 -1.98 1.90 1.00 7584 3042
acf_periodc.postMacf -0.49 0.34 -1.15 0.18 1.00 2231 2993
age06_15 0.64 0.15 0.35 0.94 1.00 1626 2728
age16_25 1.86 0.14 1.59 2.13 1.00 1519 2215
age26_35 1.13 0.14 0.86 1.41 1.00 1542 2736
age36_45 0.28 0.15 -0.01 0.59 1.00 1638 2364
age46_55 -0.62 0.17 -0.95 -0.29 1.00 1843 2641
age56_65 -1.07 0.20 -1.48 -0.68 1.00 2125 2919
age65P -1.63 0.23 -2.08 -1.19 1.00 2909 2586
sexM 0.16 0.15 -0.13 0.47 1.00 1157 2173
age06_15:sexM -0.43 0.21 -0.84 -0.02 1.00 1725 2787
age16_25:sexM -0.58 0.18 -0.94 -0.21 1.00 1469 2310
age26_35:sexM -0.34 0.19 -0.71 0.04 1.00 1630 2414
age36_45:sexM 0.23 0.20 -0.15 0.63 1.00 1672 2734
age46_55:sexM 1.14 0.22 0.72 1.56 1.00 1805 2634
age56_65:sexM 1.13 0.23 0.68 1.59 1.00 1797 2570
age65P:sexM 0.99 0.27 0.47 1.53 1.00 2395 2384
acf_periodb.acf:age06_15 0.02 1.00 -1.91 1.95 1.00 7736 2938
acf_periodc.postMacf:age06_15 -0.59 0.51 -1.59 0.40 1.00 3930 3275
acf_periodb.acf:age16_25 0.02 0.99 -1.91 1.99 1.00 7506 2933
acf_periodc.postMacf:age16_25 0.72 0.44 -0.14 1.58 1.00 3023 3268
acf_periodb.acf:age26_35 0.03 0.98 -1.84 1.98 1.00 8714 2875
acf_periodc.postMacf:age26_35 0.66 0.44 -0.20 1.50 1.00 3328 2962
acf_periodb.acf:age36_45 0.04 0.98 -1.88 1.94 1.00 6844 2951
acf_periodc.postMacf:age36_45 0.74 0.46 -0.17 1.65 1.00 3337 3020
acf_periodb.acf:age46_55 0.06 0.96 -1.81 1.96 1.00 6866 2876
acf_periodc.postMacf:age46_55 0.86 0.49 -0.09 1.82 1.00 3136 2845
acf_periodb.acf:age56_65 0.03 1.00 -1.91 1.99 1.00 7296 2744
acf_periodc.postMacf:age56_65 0.62 0.53 -0.40 1.64 1.00 3668 2874
acf_periodb.acf:age65P 0.05 0.98 -1.86 1.96 1.00 8362 3089
acf_periodc.postMacf:age65P 0.97 0.53 -0.07 2.00 1.00 4386 3439
acf_periodb.acf:sexM -0.01 0.98 -1.94 1.91 1.00 7102 3061
acf_periodc.postMacf:sexM -0.07 0.37 -0.80 0.66 1.00 2847 2849
y_num:acf_periodb.acf -0.10 0.13 -0.35 0.16 1.00 7249 3189
y_num:acf_periodc.postMacf 0.04 0.04 -0.04 0.12 1.00 1739 2586
acf_periodb.acf:age06_15:sexM -0.00 0.97 -1.94 1.89 1.00 6763 3129
acf_periodc.postMacf:age06_15:sexM -0.56 0.63 -1.79 0.70 1.00 4768 3325
acf_periodb.acf:age16_25:sexM 0.02 0.97 -1.90 1.92 1.00 7492 3015
acf_periodc.postMacf:age16_25:sexM 0.66 0.53 -0.38 1.68 1.00 4010 2887
acf_periodb.acf:age26_35:sexM -0.01 0.98 -1.94 1.90 1.00 7291 3014
acf_periodc.postMacf:age26_35:sexM 0.41 0.51 -0.59 1.42 1.00 3941 3455
acf_periodb.acf:age36_45:sexM 0.00 1.00 -1.98 1.93 1.00 9184 2870
acf_periodc.postMacf:age36_45:sexM 0.11 0.56 -1.00 1.22 1.00 4310 2868
acf_periodb.acf:age46_55:sexM 0.00 1.01 -1.96 2.00 1.00 7928 3071
acf_periodc.postMacf:age46_55:sexM 0.66 0.56 -0.45 1.79 1.00 3987 2962
acf_periodb.acf:age56_65:sexM 0.01 1.00 -1.91 1.96 1.00 8321 3017
acf_periodc.postMacf:age56_65:sexM 0.36 0.57 -0.78 1.48 1.00 4839 3205
acf_periodb.acf:age65P:sexM -0.00 1.00 -2.01 1.99 1.00 7452 3023
acf_periodc.postMacf:age65P:sexM 0.26 0.58 -0.85 1.37 1.00 4373 2604
y_num:acf_perioda.preMacf:age06_15 0.02 0.04 -0.06 0.09 1.00 1503 2657
y_num:acf_periodb.acf:age06_15 0.14 0.13 -0.11 0.40 1.00 7445 2650
y_num:acf_periodc.postMacf:age06_15 0.08 0.05 -0.02 0.17 1.00 3532 3361
y_num:acf_perioda.preMacf:age16_25 0.12 0.03 0.06 0.19 1.00 1335 2239
y_num:acf_periodb.acf:age16_25 0.25 0.13 -0.02 0.51 1.00 6976 2891
y_num:acf_periodc.postMacf:age16_25 -0.04 0.04 -0.12 0.04 1.00 2668 3077
y_num:acf_perioda.preMacf:age26_35 0.15 0.03 0.08 0.21 1.00 1292 2718
y_num:acf_periodb.acf:age26_35 0.31 0.13 0.05 0.56 1.00 7542 2977
y_num:acf_periodc.postMacf:age26_35 0.02 0.04 -0.06 0.10 1.00 2784 3047
y_num:acf_perioda.preMacf:age36_45 0.17 0.04 0.09 0.24 1.00 1404 2322
y_num:acf_periodb.acf:age36_45 0.40 0.13 0.14 0.65 1.00 6120 2780
y_num:acf_periodc.postMacf:age36_45 0.06 0.04 -0.03 0.14 1.00 2560 3061
y_num:acf_perioda.preMacf:age46_55 0.19 0.04 0.11 0.27 1.00 1565 2197
y_num:acf_periodb.acf:age46_55 0.44 0.13 0.19 0.69 1.00 6018 2769
y_num:acf_periodc.postMacf:age46_55 0.09 0.04 0.00 0.18 1.00 2778 2862
y_num:acf_perioda.preMacf:age56_65 0.17 0.05 0.08 0.27 1.00 1878 2822
y_num:acf_periodb.acf:age56_65 0.39 0.13 0.13 0.65 1.00 6711 2724
y_num:acf_periodc.postMacf:age56_65 0.11 0.05 0.01 0.20 1.00 3135 2845
y_num:acf_perioda.preMacf:age65P 0.23 0.05 0.13 0.33 1.00 2521 2635
y_num:acf_periodb.acf:age65P 0.43 0.13 0.17 0.68 1.00 7369 2952
y_num:acf_periodc.postMacf:age65P 0.11 0.05 0.01 0.20 1.00 3127 2939
y_num:acf_perioda.preMacf:sexM 0.01 0.04 -0.06 0.09 1.00 1156 2139
y_num:acf_periodb.acf:sexM -0.04 0.14 -0.31 0.23 1.00 6586 3163
y_num:acf_periodc.postMacf:sexM -0.01 0.04 -0.08 0.06 1.00 2149 2702
y_num:acf_perioda.preMacf:age06_15:sexM 0.00 0.05 -0.10 0.10 1.00 1619 2966
y_num:acf_periodb.acf:age06_15:sexM 0.05 0.14 -0.23 0.32 1.00 6445 3103
y_num:acf_periodc.postMacf:age06_15:sexM 0.10 0.06 -0.02 0.21 1.00 3952 3062
y_num:acf_perioda.preMacf:age16_25:sexM 0.00 0.05 -0.09 0.09 1.00 1354 2117
y_num:acf_periodb.acf:age16_25:sexM 0.06 0.14 -0.21 0.33 1.00 6085 3040
y_num:acf_periodc.postMacf:age16_25:sexM -0.01 0.05 -0.11 0.09 1.00 3077 3006
y_num:acf_perioda.preMacf:age26_35:sexM -0.01 0.05 -0.10 0.09 1.00 1497 2292
y_num:acf_periodb.acf:age26_35:sexM 0.06 0.14 -0.22 0.33 1.00 7153 3058
y_num:acf_periodc.postMacf:age26_35:sexM -0.00 0.05 -0.10 0.09 1.00 3037 3292
y_num:acf_perioda.preMacf:age36_45:sexM -0.01 0.05 -0.11 0.08 1.00 1569 2256
y_num:acf_periodb.acf:age36_45:sexM 0.00 0.14 -0.27 0.28 1.00 8022 2850
y_num:acf_periodc.postMacf:age36_45:sexM -0.00 0.05 -0.10 0.10 1.00 3227 3268
y_num:acf_perioda.preMacf:age46_55:sexM -0.01 0.05 -0.11 0.09 1.00 1615 2599
y_num:acf_periodb.acf:age46_55:sexM 0.00 0.14 -0.27 0.28 1.00 6729 2803
y_num:acf_periodc.postMacf:age46_55:sexM -0.06 0.05 -0.17 0.04 1.00 3257 3131
y_num:acf_perioda.preMacf:age56_65:sexM 0.05 0.06 -0.06 0.16 1.00 1593 2653
y_num:acf_periodb.acf:age56_65:sexM 0.08 0.14 -0.20 0.35 1.00 7502 3158
y_num:acf_periodc.postMacf:age56_65:sexM 0.02 0.05 -0.09 0.12 1.00 3499 3156
y_num:acf_perioda.preMacf:age65P:sexM 0.00 0.06 -0.12 0.12 1.00 2115 2525
y_num:acf_periodb.acf:age65P:sexM 0.08 0.14 -0.20 0.36 1.00 6531 2903
y_num:acf_periodc.postMacf:age65P:sexM 0.01 0.05 -0.10 0.12 1.00 3303 2920
Further Distributional Parameters:
Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS Tail_ESS
shape 202.18 67.77 107.63 366.50 1.00 2927 2839
Draws were sampled using sample(hmc). For each parameter, Bulk_ESS
and Tail_ESS are effective sample size measures, and Rhat is the potential
scale reduction factor on split chains (at convergence, Rhat = 1).
plot(m_age_sex)
pp_check(m_age_sex, type='ecdf_overlay')
Using 10 posterior draws for ppc type 'ecdf_overlay' by default.
Summarise posterior
#posterior draws, and summarise
age_sex_summary <- mdata_age_sex %>%
select(year, year2, y_num, acf_period, age, sex) %>%
add_epred_draws(m_age_sex) %>%
group_by(year2, acf_period, age, sex) %>%
mean_qi() %>%
mutate(acf_period = case_when(acf_period=="a. pre-acf" ~ "Before Intervention",
acf_period=="c. post-acf" ~ "Post Intervention"))
#create the counterfactual (no intervention), and summarise
age_sex_counterfact <-
tibble(year = mdata_age_sex$year,
year2 = mdata_age_sex$year2,
y_num = mdata_age_sex$y_num,
age = mdata_age_sex$age,
sex = mdata_age_sex$sex,
acf_period = factor("a. pre-acf")) %>%
add_epred_draws(m_age_sex) %>%
group_by(year2, acf_period, age, sex) %>%
mean_qi() %>%
mutate(acf_period = case_when(acf_period=="a. pre-acf" ~ "Before Intervention",
acf_period=="c. post-acf" ~ "Post Intervention")) %>%
ungroup() %>%
mutate(age = case_when(age=="00_05" ~ "0 to 5y",
age=="06_15" ~ "06 to 15y",
age=="16_25" ~ "16 to 25y",
age=="26_35" ~ "26 to 35y",
age=="36_45" ~ "36 to 45y",
age=="46_55" ~ "46 to 55y",
age=="56_65" ~ "56 to 65y",
age=="65+" ~ "65 & up y")) %>%
mutate(sex = case_when(sex== "M" ~ "Male",
sex== "F" ~ "Female"))
age_sex_summary %>%
ungroup() %>%
mutate(age = case_when(age=="00_05" ~ "0 to 5y",
age=="06_15" ~ "06 to 15y",
age=="16_25" ~ "16 to 25y",
age=="26_35" ~ "26 to 35y",
age=="36_45" ~ "36 to 45y",
age=="46_55" ~ "46 to 55y",
age=="56_65" ~ "56 to 65y",
age=="65+" ~ "65 & up y")) %>%
mutate(sex = case_when(sex== "M" ~ "Male",
sex== "F" ~ "Female")) %>%
ggplot() +
geom_ribbon(aes(ymin=.epred.lower, ymax=.epred.upper, x=year2, group = acf_period, fill=acf_period), alpha=0.5) +
geom_ribbon(data = age_sex_counterfact %>% filter(year>=1956),
aes(ymin=.epred.lower, ymax=.epred.upper, x=year2, fill="Counterfactual"), alpha=0.5) +
geom_line(data = age_sex_counterfact %>% filter(year>=1956),
aes(y=.epred, x=year2, colour="Counterfactual")) +
geom_line(aes(y=.epred, x=year2, group=acf_period, colour=acf_period)) +
geom_point(data = mdata_age_sex %>%
ungroup() %>%
mutate(age = case_when(age=="00_05" ~ "0 to 5y",
age=="06_15" ~ "06 to 15y",
age=="16_25" ~ "16 to 25y",
age=="26_35" ~ "26 to 35y",
age=="36_45" ~ "36 to 45y",
age=="46_55" ~ "46 to 55y",
age=="56_65" ~ "56 to 65y",
age=="65+" ~ "65 & up y")) %>%
mutate(sex = case_when(sex== "M" ~ "Male",
sex== "F" ~ "Female")) %>%
mutate(acf_period = case_when(acf_period=="a. pre-acf" ~ "Before Intervention",
acf_period=="b. acf" ~ "Counterfactual",
acf_period=="c. post-acf" ~ "Post Intervention"))%>%
mutate(acf_period2 = case_when(acf_period=="Before Intervention" ~ "Pre-ACF",
acf_period=="Counterfactual" ~ "ACF",
acf_period=="Post Intervention" ~ "Post-ACF")),
aes(y=cases, x=year2, shape=fct_relevel(acf_period2,
"Pre-ACF",
"ACF",
"Post-ACF")), size=2) +
geom_vline(aes(xintercept=acf_end), linetype=3) +
facet_grid(fct_relevel(age, "65 & up y", "56 to 65y", "46 to 55y", "36 to 45y", "26 to 35y", "16 to 25y", "06 to 15y", "0 to 5y")~sex,
scales = "free_y") +
theme_ggdist() +
scale_y_continuous(labels=comma) +
scale_x_continuous(labels = year_labels,
breaks = year_labels,
guide = guide_axis(angle = 90)) +
scale_fill_manual(values = c("#DE0D92", "grey50", "#4D6CFA") , name="Model estimates:") +
scale_colour_manual(values = c("#DE0D92", "grey50", "#4D6CFA") , name="Model estimates:") +
scale_shape_discrete(name="Emprical data (period):", na.translate = F) +
labs(
x = "Year",
y = "Case notifications (n)",
caption = "Mass miniature X-ray campaign period between dashed lines (11th March-12th April 1957)"
) +
theme(legend.position = "bottom",
legend.box="vertical",
panel.border = element_rect(colour = "grey78", fill=NA))
ggsave(here("figures/s14.tiff"), height=10)
Saving 7.29 x 10 in image
Calculate summary effects
#Peak
out_age_sex_1 <- crossing(mdata_age_sex %>%
select(y_num, age, sex) %>%
filter(y_num == 8),
acf_period = c("a. pre-acf", "b. acf"))
peak_draws_age_sex <- add_epred_draws(newdata = out_age_sex_1,
object = m_age_sex) %>%
group_by(.draw, age, sex) %>%
summarise(estimate = last(.epred)/first(.epred)) %>%
ungroup() %>%
mutate(measure = "RR.peak")
`summarise()` has grouped output by '.draw', 'age'. You can override using the `.groups` argument.
peak_summary_age_sex <- peak_draws_age_sex %>%
group_by(age, sex) %>%
mean_qi(estimate) %>%
mutate(measure = "RR.peak")
#Level
out_age_sex_2 <- crossing(mdata_age_sex %>%
select(y_num, age, sex) %>%
filter(y_num == 9),
acf_period = c("a. pre-acf", "c. post-acf"))
level_draws_age_sex <- add_epred_draws(newdata = out_age_sex_2,
object = m_age_sex) %>%
arrange(y_num, .draw) %>%
group_by(.draw, age, sex) %>%
summarise(estimate = last(.epred)/first(.epred)) %>%
ungroup() %>%
mutate(measure = "RR.level")
`summarise()` has grouped output by '.draw', 'age'. You can override using the `.groups` argument.
level_summary_age_sex <- level_draws_age_sex %>%
group_by(age, sex) %>%
mean_qi(estimate) %>%
mutate(measure = "RR.level")
#Slope
out_age_sex_3 <- crossing(mdata_age_sex %>%
select(y_num, age, sex) %>%
filter(y_num %in% c(9,14)),
acf_period = c("a. pre-acf", "c. post-acf"))
slope_draws_age_sex <- add_epred_draws(newdata = out_age_sex_3,
object = m_age_sex) %>%
arrange(y_num) %>%
ungroup() %>%
group_by(.draw, y_num, age, sex) %>%
summarise(slope = last(.epred)/first(.epred)) %>%
ungroup() %>%
group_by(.draw, age, sex) %>%
summarise(estimate = last(slope)/first(slope)) %>%
mutate(measure = "RR.slope")
`summarise()` has grouped output by '.draw', 'y_num', 'age'. You can override using the `.groups` argument.
`summarise()` has grouped output by '.draw', 'age'. You can override using the `.groups` argument.
slope_summary_age_sex <- slope_draws_age_sex %>%
group_by(age, sex) %>%
median_qi(estimate) %>%
mutate(measure = "RR.slope")
Numerical summary of these summary results
bind_rows(
peak_summary_age_sex, level_summary_age_sex, slope_summary_age_sex
) %>%
mutate(across(c(estimate:.upper), number, accuracy=0.01)) %>%
select(measure, everything()) %>%
datatable()
NA
NA
NA
As a figure
peak_g_age_sex <- peak_summary_age_sex %>%
mutate(sex = case_when(sex=="M" ~ "Male",
sex=="F" ~ "Female")) %>%
mutate(age = case_when(age=="00_05" ~ "0 to 5y",
age=="06_15" ~ "06 to 15y",
age=="16_25" ~ "16 to 25y",
age=="26_35" ~ "26 to 35y",
age=="36_45" ~ "36 to 45y",
age=="46_55" ~ "46 to 55y",
age=="56_65" ~ "56 to 65y",
age=="65+" ~ "65 & up y")) %>%
ggplot() +
geom_hline(aes(yintercept=1), linetype=2)+
geom_pointrange(aes(x=age, y=estimate, ymin=.lower, ymax=.upper, group=sex, colour=sex, shape=sex),
position = position_dodge(width = 0.5)) +
scale_colour_manual(values = c("#CD7AC5", "cadetblue3"), name="") +
scale_shape(name="") +
labs(x="",
y="Relative rate (95% UI)") +
theme_ggdist() +
theme(legend.position = "bottom",
panel.border = element_rect(colour = "grey78", fill=NA))
#level plot
level_g_age_sex <- level_summary_age_sex %>%
mutate(sex = case_when(sex=="M" ~ "Male",
sex=="F" ~ "Female")) %>%
mutate(age = case_when(age=="00_05" ~ "0 to 5y",
age=="06_15" ~ "06 to 15y",
age=="16_25" ~ "16 to 25y",
age=="26_35" ~ "26 to 35y",
age=="36_45" ~ "36 to 45y",
age=="46_55" ~ "46 to 55y",
age=="56_65" ~ "56 to 65y",
age=="65+" ~ "65 & up y")) %>%
ggplot() +
geom_hline(aes(yintercept=1), linetype=2)+
geom_pointrange(aes(x=age, y=estimate, ymin=.lower, ymax=.upper, group=sex, colour=sex, shape=sex),
position = position_dodge(width = 0.5)) +
scale_colour_manual(values = c("#CD7AC5", "cadetblue3"), name="") +
scale_shape(name="") +
labs(x="",
y="Relative rate (95% UI)") +
theme_ggdist() +
theme(legend.position = "bottom",
panel.border = element_rect(colour = "grey78", fill=NA))
#slope plot
slope_g_age_sex <- slope_summary_age_sex %>%
mutate(sex = case_when(sex=="M" ~ "Male",
sex=="F" ~ "Female")) %>%
mutate(age = case_when(age=="00_05" ~ "0 to 5y",
age=="06_15" ~ "06 to 15y",
age=="16_25" ~ "16 to 25y",
age=="26_35" ~ "26 to 35y",
age=="36_45" ~ "36 to 45y",
age=="46_55" ~ "46 to 55y",
age=="56_65" ~ "56 to 65y",
age=="65+" ~ "65 & up y")) %>%
ggplot() +
geom_hline(aes(yintercept=1), linetype=2)+
geom_pointrange(aes(x=age, y=estimate, ymin=.lower, ymax=.upper, group=sex, colour=sex, shape=sex),
position = position_dodge(width = 0.5)) +
scale_colour_manual(values = c("#CD7AC5", "cadetblue3"), name="") +
scale_shape(name="") +
labs(x="",
y="Relative rate (95% UI)") +
theme_ggdist() +
theme(legend.position = "bottom",
panel.border = element_rect(colour = "grey78", fill=NA))
counterfact_age_sex <-
add_epred_draws(object = m_age_sex,
newdata = mdata_age_sex %>%
select(year, year2, y_num, age, sex) %>%
mutate(acf_period = "a. pre-acf")) %>%
filter(year>1957) %>%
select(year, age, sex, .draw, .epred_counterf = .epred)
Adding missing grouping variables: `year2`, `y_num`, `acf_period`, `.row`
#Calcuate predicted number of cases per draw, then summarise.
post_change_age_sex <-
add_epred_draws(object = m_age_sex,
newdata = mdata_age_sex %>%
select(year, year2, y_num, age, sex, acf_period)) %>%
filter(year>1957) %>%
ungroup() %>%
select(year, age, sex, .draw, .epred)
#for the overall period
counterfact_overall_age_sex <-
add_epred_draws(object = m_age_sex,
newdata = mdata_age_sex %>%
select(year, year2, y_num, age, sex) %>%
mutate(acf_period = "a. pre-acf")) %>%
filter(year>1957) %>%
select(age, sex, .draw, .epred) %>%
group_by(age, sex, .draw) %>%
summarise(.epred_counterf = sum(.epred)) %>%
mutate(year = "Overall (1958-1963)")
Adding missing grouping variables: `year`, `year2`, `y_num`, `acf_period`, `.row`
`summarise()` has grouped output by 'age', 'sex'. You can override using the `.groups` argument.
#Calcuate incidence per draw, then summarise.
post_change_overall_age_sex <-
add_epred_draws(object = m_age_sex,
newdata = mdata_age_sex %>%
select(year, year2, y_num, age, sex, acf_period)) %>%
filter(year>1957) %>%
select(age, sex, .draw, .epred) %>%
group_by(.draw, age, sex) %>%
summarise(.epred = sum(.epred))
Adding missing grouping variables: `year`, `year2`, `y_num`, `acf_period`, `.row`
`summarise()` has grouped output by '.draw', 'age'. You can override using the `.groups` argument.
counter_post_overall_age_sex <-
left_join(counterfact_overall_age_sex, post_change_overall_age_sex) %>%
mutate(cases_averted = .epred_counterf-.epred,
pct_change = (.epred - .epred_counterf)/.epred_counterf) %>%
group_by(age, sex) %>%
mean_qi(cases_averted, pct_change) %>%
ungroup() %>%
mutate(year = "Overall (1958-1963)")
Joining with `by = join_by(age, sex, .draw)`
age_sex_txt <- counter_post_overall_age_sex %>%
mutate(across(c(cases_averted:cases_averted.upper), number_format(accuracy = 0.1, big.mark = ","))) %>%
mutate(across(c(pct_change:pct_change.upper), percent, accuracy=0.1)) %>%
transmute(year = as.character(year),
sex = sex,
age = age,
cases_averted = glue::glue("{cases_averted}\n({cases_averted.lower} to {cases_averted.upper})"),
pct_change = glue::glue("{pct_change}\n({pct_change.lower} to {pct_change.upper})"))
age_sex_txt %>% datatable()
NA
NA
counterfactual_g_age_sex <- counter_post_overall_age_sex %>%
mutate(sex = case_when(sex=="M" ~ "Male",
sex=="F" ~ "Female")) %>%
mutate(age = case_when(age=="00_05" ~ "0 to 5y",
age=="06_15" ~ "06 to 15y",
age=="16_25" ~ "16 to 25y",
age=="26_35" ~ "26 to 35y",
age=="36_45" ~ "36 to 45y",
age=="46_55" ~ "46 to 55y",
age=="56_65" ~ "56 to 65y",
age=="65+" ~ "65 & up y")) %>%
ggplot() +
geom_pointrange(aes(x = age, y=cases_averted, ymin=cases_averted.lower, ymax=cases_averted.upper, colour=sex, shape=sex), position=position_dodge(width=0.5)) +
scale_colour_manual(values = c("#CD7AC5", "cadetblue3"), name="") +
scale_shape(name="") +
scale_y_continuous(labels = comma) +
labs(x="",
y="Number (95% UI)",
colour="") +
theme_ggdist() +
theme(panel.border = element_rect(colour = "grey78", fill=NA),
legend.position = "bottom")
counterfactual_g_age_sex
Join together for Figure 3.
(peak_g_age_sex + level_g_age_sex) / (slope_g_age_sex + counterfactual_g_age_sex) + plot_annotation(tag_levels = "A") + plot_layout(guides = "collect") & theme(legend.position = "bottom")
ggsave(here("figures/f3.tiff"), width = 12, height=8)
NA
NA